From 132672f4060d7d7879bd8b240b070ffd6467d6a6 Mon Sep 17 00:00:00 2001 From: Shuotian Cheng Date: Tue, 9 May 2017 13:20:57 -0700 Subject: [PATCH] [orch]: Add SwSS record mechanism that record all tasks from applications (#207) Similar to sairedis record, this swss record provides a log that records all applications tasks according to the sequence of incoming messages from the Redis database queue. The format follows the sairedis record format: TIMESTAMP|TABLE_NAME:KEY|OP|FIELD:VALUE|FIELD:VALUE|... The record file is saved at where the daemon is started from, with the file name swss.TIMESTAMP.rec. -r record_type is added with options: 0/1/2/3 and usage() is added --- orchagent/bufferorch.cpp | 4 -- orchagent/copporch.cpp | 3 -- orchagent/main.cpp | 89 +++++++++++++++++++++++++++++++++------- orchagent/orch.cpp | 30 +++++++++++--- orchagent/orch.h | 2 +- orchagent/qosorch.cpp | 4 -- 6 files changed, 100 insertions(+), 32 deletions(-) diff --git a/orchagent/bufferorch.cpp b/orchagent/bufferorch.cpp index cf613588c09c..d7c464a6b035 100644 --- a/orchagent/bufferorch.cpp +++ b/orchagent/bufferorch.cpp @@ -549,21 +549,17 @@ void BufferOrch::doTask(Consumer &consumer) break; case task_process_status::task_invalid_entry: SWSS_LOG_ERROR("Invalid buffer task item was encountered, removing from queue."); - dumpTuple(consumer, tuple); it = consumer.m_toSync.erase(it); break; case task_process_status::task_failed: SWSS_LOG_ERROR("Processing buffer task item failed, exiting."); - dumpTuple(consumer, tuple); return; case task_process_status::task_need_retry: SWSS_LOG_ERROR("Processing buffer task item failed, will retry."); - dumpTuple(consumer, tuple); it++; break; default: SWSS_LOG_ERROR("Unknown task status: %d", task_status); - dumpTuple(consumer, tuple); it = consumer.m_toSync.erase(it); break; } diff --git a/orchagent/copporch.cpp b/orchagent/copporch.cpp index e58010c44acc..a3c9480bc21a 100644 --- a/orchagent/copporch.cpp +++ b/orchagent/copporch.cpp @@ -577,16 +577,13 @@ void CoppOrch::doTask(Consumer &consumer) break; case task_process_status::task_invalid_entry: SWSS_LOG_ERROR("Invalid copp task item was encountered, removing from queue."); - dumpTuple(consumer, tuple); it = consumer.m_toSync.erase(it); break; case task_process_status::task_failed: SWSS_LOG_ERROR("Processing copp task item failed, exiting. "); - dumpTuple(consumer, tuple); return; case task_process_status::task_need_retry: SWSS_LOG_ERROR("Processing copp task item failed, will retry."); - dumpTuple(consumer, tuple); it++; break; default: diff --git a/orchagent/main.cpp b/orchagent/main.cpp index c69cf35c9235..eabb0e98f97b 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -3,6 +3,7 @@ extern "C" { #include "saistatus.h" } +#include #include #include #include @@ -50,6 +51,11 @@ sai_object_id_t gVirtualRouterId; sai_object_id_t gUnderlayIfId; MacAddress gMacAddress; +bool gSairedisRecord = true; +bool gSwssRecord = true; +ofstream gRecordOfs; +string gRecordFile; + /* Global database mutex */ mutex gDbMutex; @@ -134,6 +140,30 @@ void initSaiApi() sai_log_set(SAI_API_ACL, SAI_LOG_NOTICE); } +string getTimestamp() +{ + char buffer[64]; + struct timeval tv; + gettimeofday(&tv, NULL); + + size_t size = strftime(buffer, 32 ,"%Y-%m-%d.%T.", localtime(&tv.tv_sec)); + snprintf(&buffer[size], 32, "%06ld", tv.tv_usec); + + return string(buffer); +} + +void usage() +{ + cout << "usage: orchagent [-h] [-r record_type] [-m MAC]" << endl; + cout << " -h: display this message" << endl; + cout << " -r record_type: record orchagent logs with type (default 3)" << endl; + cout << " 0: do not record logs" << endl; + cout << " 1: record SAI call sequence as sairedis*.rec" << endl; + cout << " 2: record SwSS task sequence as swss*.rec" << endl; + cout << " 3: enable both above two records" << endl; + cout << " -m MAC: set switch MAC address" << endl; +} + int main(int argc, char **argv) { swss::Logger::linkToDbNative("orchagent"); @@ -143,19 +173,39 @@ int main(int argc, char **argv) int opt; sai_status_t status; - bool disableRecord = false; - - while ((opt = getopt(argc, argv, "m:hR")) != -1) + while ((opt = getopt(argc, argv, "m:r:h")) != -1) { switch (opt) { - case 'R': - disableRecord = true; - break; case 'm': gMacAddress = MacAddress(optarg); break; + case 'r': + if (!strcmp(optarg, "0")) + { + gSairedisRecord = false; + gSwssRecord = false; + } + else if (!strcmp(optarg, "1")) + { + gSwssRecord = false; + } + else if (!strcmp(optarg, "2")) + { + gSairedisRecord = false; + } + else if (!strcmp(optarg, "3")) + { + continue; /* default behavior */ + } + else + { + usage(); + exit(EXIT_FAILURE); + } + break; case 'h': + usage(); exit(EXIT_SUCCESS); default: /* '?' */ exit(EXIT_FAILURE); @@ -174,26 +224,36 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - SWSS_LOG_NOTICE("Enabling sairedis recording"); - + /* Enable SAI Redis recording */ sai_attribute_t attr; attr.id = SAI_REDIS_SWITCH_ATTR_RECORD; - attr.value.booldata = !disableRecord; + attr.value.booldata = gSairedisRecord; status = sai_switch_api->set_switch_attribute(&attr); - if (status != SAI_STATUS_SUCCESS) { - SWSS_LOG_ERROR("Failed to enable recording %d", status); + SWSS_LOG_ERROR("Failed to enable SAI Redis recording %d", status); exit(EXIT_FAILURE); } + /* Enable SwSS recording */ + if (gSwssRecord) + { + gRecordFile = "swss." + getTimestamp() + ".rec"; + gRecordOfs.open(gRecordFile); + if (!gRecordOfs.is_open()) + { + SWSS_LOG_ERROR("Failed to open SwSS recording file %s", gRecordFile.c_str()); + exit(EXIT_FAILURE); + } + } + SWSS_LOG_NOTICE("Notify syncd INIT_VIEW"); attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD; attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW; - status = sai_switch_api->set_switch_attribute(&attr); + status = sai_switch_api->set_switch_attribute(&attr); if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to notify syncd INIT_VIEW %d", status); @@ -205,7 +265,7 @@ int main(int argc, char **argv) attr.id = SAI_REDIS_SWITCH_ATTR_USE_PIPELINE; attr.value.booldata = true; - sai_switch_api->set_switch_attribute(&attr); + status = sai_switch_api->set_switch_attribute(&attr); if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to enable redis pipeline %d", status); @@ -239,6 +299,7 @@ int main(int argc, char **argv) /* Get the default virtual router ID */ attr.id = SAI_SWITCH_ATTR_DEFAULT_VIRTUAL_ROUTER_ID; + status = sai_switch_api->get_switch_attribute(1, &attr); if (status != SAI_STATUS_SUCCESS) { @@ -260,7 +321,7 @@ int main(int argc, char **argv) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to create underlay router interface %d", status); - return false; + exit(EXIT_FAILURE); } SWSS_LOG_NOTICE("Created underlay router interface ID %lx", gUnderlayIfId); diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index 1348a71ac40b..723e4d2c21bf 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include "orch.h" #include "portsorch.h" @@ -11,6 +13,11 @@ using namespace swss; extern mutex gDbMutex; extern PortsOrch *gPortsOrch; +extern bool gSwssRecord; +extern ofstream gRecordOfs; +extern string gRecordFile; +extern string getTimestamp(); + Orch::Orch(DBConnector *db, string tableName) : m_db(db) { @@ -33,6 +40,11 @@ Orch::~Orch() delete(m_db); for(auto &it : m_consumerMap) delete it.second.m_consumer; + + if (gRecordOfs.is_open()) + { + gRecordOfs.close(); + } } vector Orch::getSelectables() @@ -73,13 +85,17 @@ bool Orch::execute(string tableName) string key = kfvKey(new_data); string op = kfvOp(new_data); - // Possible nothing popped, ie. the oparation is already merged with other operations + /* Possible nothing popped, ie. the oparation is already merged with other operations */ if (op.empty()) { return true; } - dumpTuple(consumer, new_data); + /* Record incoming tasks */ + if (gSwssRecord) + { + recordTuple(consumer, new_data); + } /* If a new task comes or if a DEL task comes, we directly put it into consumer.m_toSync map */ if (consumer.m_toSync.find(key) == consumer.m_toSync.end() || op == DEL_COMMAND) @@ -214,14 +230,16 @@ void Orch::doTask() } } -void Orch::dumpTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple) +void Orch::recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple) { - string debug_msg = "Full table content: " + consumer.m_consumer->getTableName() + " key : " + kfvKey(tuple) + " op : " + kfvOp(tuple); + string s = consumer.m_consumer->getTableName() + ":" + kfvKey(tuple) + + "|" + kfvOp(tuple); for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) { - debug_msg += " " + fvField(*i) + " : " + fvValue(*i); + s += "|" + fvField(*i) + ":" + fvValue(*i); } - SWSS_LOG_DEBUG("%s\n", debug_msg.c_str()); + + gRecordOfs << getTimestamp() << "|" << s << endl; } ref_resolve_status Orch::resolveFieldRefArray( diff --git a/orchagent/orch.h b/orchagent/orch.h index 296628e79843..314407a92733 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -77,7 +77,7 @@ class Orch /* Run doTask against a specific consumer */ virtual void doTask(Consumer &consumer) = 0; - void dumpTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple); + void recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple); ref_resolve_status resolveFieldRefValue(type_map&, const string&, KeyOpFieldsValuesTuple&, sai_object_id_t&); bool parseIndexRange(const string &input, sai_uint32_t &range_low, sai_uint32_t &range_high); bool parseReference(type_map &type_maps, string &ref, string &table_name, string &object_name); diff --git a/orchagent/qosorch.cpp b/orchagent/qosorch.cpp index 35442ee01824..1c6220158bd9 100644 --- a/orchagent/qosorch.cpp +++ b/orchagent/qosorch.cpp @@ -1260,22 +1260,18 @@ void QosOrch::doTask(Consumer &consumer) switch(task_status) { case task_process_status::task_success : - dumpTuple(consumer, tuple); it = consumer.m_toSync.erase(it); SWSS_LOG_DEBUG("Successfully processed item, removing from queue."); break; case task_process_status::task_invalid_entry : SWSS_LOG_ERROR("Invalid QOS task item was encountered, removing from queue."); - dumpTuple(consumer, tuple); it = consumer.m_toSync.erase(it); break; case task_process_status::task_failed : SWSS_LOG_ERROR("Processing QOS task item failed, exiting."); - dumpTuple(consumer, tuple); return; case task_process_status::task_need_retry : SWSS_LOG_INFO("Processing QOS task item failed, will retry."); - dumpTuple(consumer, tuple); it++; break; default: