Skip to content

Commit

Permalink
[orch]: Add SwSS record mechanism that record all tasks from applicat…
Browse files Browse the repository at this point in the history
…ions (sonic-net#207)

Similar to sairedis record, this swss record provides a log that records
all applications tasks according to the sequence of incoming messages from
the Redis database queue. The format follows the sairedis record format:

TIMESTAMP|TABLE_NAME:KEY|OP|FIELD:VALUE|FIELD:VALUE|...

The record file is saved at where the daemon is started from, with the file
name swss.TIMESTAMP.rec.

-r record_type is added with options: 0/1/2/3 and usage() is added
  • Loading branch information
Shuotian Cheng committed May 9, 2017
1 parent 49c556c commit 132672f
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 32 deletions.
4 changes: 0 additions & 4 deletions orchagent/bufferorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,21 +549,17 @@ void BufferOrch::doTask(Consumer &consumer)
break;
case task_process_status::task_invalid_entry:
SWSS_LOG_ERROR("Invalid buffer task item was encountered, removing from queue.");
dumpTuple(consumer, tuple);
it = consumer.m_toSync.erase(it);
break;
case task_process_status::task_failed:
SWSS_LOG_ERROR("Processing buffer task item failed, exiting.");
dumpTuple(consumer, tuple);
return;
case task_process_status::task_need_retry:
SWSS_LOG_ERROR("Processing buffer task item failed, will retry.");
dumpTuple(consumer, tuple);
it++;
break;
default:
SWSS_LOG_ERROR("Unknown task status: %d", task_status);
dumpTuple(consumer, tuple);
it = consumer.m_toSync.erase(it);
break;
}
Expand Down
3 changes: 0 additions & 3 deletions orchagent/copporch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,16 +577,13 @@ void CoppOrch::doTask(Consumer &consumer)
break;
case task_process_status::task_invalid_entry:
SWSS_LOG_ERROR("Invalid copp task item was encountered, removing from queue.");
dumpTuple(consumer, tuple);
it = consumer.m_toSync.erase(it);
break;
case task_process_status::task_failed:
SWSS_LOG_ERROR("Processing copp task item failed, exiting. ");
dumpTuple(consumer, tuple);
return;
case task_process_status::task_need_retry:
SWSS_LOG_ERROR("Processing copp task item failed, will retry.");
dumpTuple(consumer, tuple);
it++;
break;
default:
Expand Down
89 changes: 75 additions & 14 deletions orchagent/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ extern "C" {
#include "saistatus.h"
}

#include <fstream>
#include <iostream>
#include <map>
#include <mutex>
Expand Down Expand Up @@ -50,6 +51,11 @@ sai_object_id_t gVirtualRouterId;
sai_object_id_t gUnderlayIfId;
MacAddress gMacAddress;

bool gSairedisRecord = true;
bool gSwssRecord = true;
ofstream gRecordOfs;
string gRecordFile;

/* Global database mutex */
mutex gDbMutex;

Expand Down Expand Up @@ -134,6 +140,30 @@ void initSaiApi()
sai_log_set(SAI_API_ACL, SAI_LOG_NOTICE);
}

string getTimestamp()
{
char buffer[64];
struct timeval tv;
gettimeofday(&tv, NULL);

size_t size = strftime(buffer, 32 ,"%Y-%m-%d.%T.", localtime(&tv.tv_sec));
snprintf(&buffer[size], 32, "%06ld", tv.tv_usec);

return string(buffer);
}

void usage()
{
cout << "usage: orchagent [-h] [-r record_type] [-m MAC]" << endl;
cout << " -h: display this message" << endl;
cout << " -r record_type: record orchagent logs with type (default 3)" << endl;
cout << " 0: do not record logs" << endl;
cout << " 1: record SAI call sequence as sairedis*.rec" << endl;
cout << " 2: record SwSS task sequence as swss*.rec" << endl;
cout << " 3: enable both above two records" << endl;
cout << " -m MAC: set switch MAC address" << endl;
}

int main(int argc, char **argv)
{
swss::Logger::linkToDbNative("orchagent");
Expand All @@ -143,19 +173,39 @@ int main(int argc, char **argv)
int opt;
sai_status_t status;

bool disableRecord = false;

while ((opt = getopt(argc, argv, "m:hR")) != -1)
while ((opt = getopt(argc, argv, "m:r:h")) != -1)
{
switch (opt)
{
case 'R':
disableRecord = true;
break;
case 'm':
gMacAddress = MacAddress(optarg);
break;
case 'r':
if (!strcmp(optarg, "0"))
{
gSairedisRecord = false;
gSwssRecord = false;
}
else if (!strcmp(optarg, "1"))
{
gSwssRecord = false;
}
else if (!strcmp(optarg, "2"))
{
gSairedisRecord = false;
}
else if (!strcmp(optarg, "3"))
{
continue; /* default behavior */
}
else
{
usage();
exit(EXIT_FAILURE);
}
break;
case 'h':
usage();
exit(EXIT_SUCCESS);
default: /* '?' */
exit(EXIT_FAILURE);
Expand All @@ -174,26 +224,36 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}

SWSS_LOG_NOTICE("Enabling sairedis recording");

/* Enable SAI Redis recording */
sai_attribute_t attr;
attr.id = SAI_REDIS_SWITCH_ATTR_RECORD;
attr.value.booldata = !disableRecord;
attr.value.booldata = gSairedisRecord;

status = sai_switch_api->set_switch_attribute(&attr);

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to enable recording %d", status);
SWSS_LOG_ERROR("Failed to enable SAI Redis recording %d", status);
exit(EXIT_FAILURE);
}

/* Enable SwSS recording */
if (gSwssRecord)
{
gRecordFile = "swss." + getTimestamp() + ".rec";
gRecordOfs.open(gRecordFile);
if (!gRecordOfs.is_open())
{
SWSS_LOG_ERROR("Failed to open SwSS recording file %s", gRecordFile.c_str());
exit(EXIT_FAILURE);
}
}

SWSS_LOG_NOTICE("Notify syncd INIT_VIEW");

attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INIT_VIEW;
status = sai_switch_api->set_switch_attribute(&attr);

status = sai_switch_api->set_switch_attribute(&attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to notify syncd INIT_VIEW %d", status);
Expand All @@ -205,7 +265,7 @@ int main(int argc, char **argv)
attr.id = SAI_REDIS_SWITCH_ATTR_USE_PIPELINE;
attr.value.booldata = true;

sai_switch_api->set_switch_attribute(&attr);
status = sai_switch_api->set_switch_attribute(&attr);
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to enable redis pipeline %d", status);
Expand Down Expand Up @@ -239,6 +299,7 @@ int main(int argc, char **argv)

/* Get the default virtual router ID */
attr.id = SAI_SWITCH_ATTR_DEFAULT_VIRTUAL_ROUTER_ID;

status = sai_switch_api->get_switch_attribute(1, &attr);
if (status != SAI_STATUS_SUCCESS)
{
Expand All @@ -260,7 +321,7 @@ int main(int argc, char **argv)
if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to create underlay router interface %d", status);
return false;
exit(EXIT_FAILURE);
}

SWSS_LOG_NOTICE("Created underlay router interface ID %lx", gUnderlayIfId);
Expand Down
30 changes: 24 additions & 6 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <fstream>
#include <iostream>
#include <mutex>
#include <sys/time.h>

#include "orch.h"
#include "portsorch.h"
Expand All @@ -11,6 +13,11 @@ using namespace swss;
extern mutex gDbMutex;
extern PortsOrch *gPortsOrch;

extern bool gSwssRecord;
extern ofstream gRecordOfs;
extern string gRecordFile;
extern string getTimestamp();

Orch::Orch(DBConnector *db, string tableName) :
m_db(db)
{
Expand All @@ -33,6 +40,11 @@ Orch::~Orch()
delete(m_db);
for(auto &it : m_consumerMap)
delete it.second.m_consumer;

if (gRecordOfs.is_open())
{
gRecordOfs.close();
}
}

vector<Selectable *> Orch::getSelectables()
Expand Down Expand Up @@ -73,13 +85,17 @@ bool Orch::execute(string tableName)

string key = kfvKey(new_data);
string op = kfvOp(new_data);
// Possible nothing popped, ie. the oparation is already merged with other operations
/* Possible nothing popped, ie. the oparation is already merged with other operations */
if (op.empty())
{
return true;
}

dumpTuple(consumer, new_data);
/* Record incoming tasks */
if (gSwssRecord)
{
recordTuple(consumer, new_data);
}

/* If a new task comes or if a DEL task comes, we directly put it into consumer.m_toSync map */
if (consumer.m_toSync.find(key) == consumer.m_toSync.end() || op == DEL_COMMAND)
Expand Down Expand Up @@ -214,14 +230,16 @@ void Orch::doTask()
}
}

void Orch::dumpTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple)
void Orch::recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple)
{
string debug_msg = "Full table content: " + consumer.m_consumer->getTableName() + " key : " + kfvKey(tuple) + " op : " + kfvOp(tuple);
string s = consumer.m_consumer->getTableName() + ":" + kfvKey(tuple)
+ "|" + kfvOp(tuple);
for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++)
{
debug_msg += " " + fvField(*i) + " : " + fvValue(*i);
s += "|" + fvField(*i) + ":" + fvValue(*i);
}
SWSS_LOG_DEBUG("%s\n", debug_msg.c_str());

gRecordOfs << getTimestamp() << "|" << s << endl;
}

ref_resolve_status Orch::resolveFieldRefArray(
Expand Down
2 changes: 1 addition & 1 deletion orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class Orch

/* Run doTask against a specific consumer */
virtual void doTask(Consumer &consumer) = 0;
void dumpTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple);
void recordTuple(Consumer &consumer, KeyOpFieldsValuesTuple &tuple);
ref_resolve_status resolveFieldRefValue(type_map&, const string&, KeyOpFieldsValuesTuple&, sai_object_id_t&);
bool parseIndexRange(const string &input, sai_uint32_t &range_low, sai_uint32_t &range_high);
bool parseReference(type_map &type_maps, string &ref, string &table_name, string &object_name);
Expand Down
4 changes: 0 additions & 4 deletions orchagent/qosorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1260,22 +1260,18 @@ void QosOrch::doTask(Consumer &consumer)
switch(task_status)
{
case task_process_status::task_success :
dumpTuple(consumer, tuple);
it = consumer.m_toSync.erase(it);
SWSS_LOG_DEBUG("Successfully processed item, removing from queue.");
break;
case task_process_status::task_invalid_entry :
SWSS_LOG_ERROR("Invalid QOS task item was encountered, removing from queue.");
dumpTuple(consumer, tuple);
it = consumer.m_toSync.erase(it);
break;
case task_process_status::task_failed :
SWSS_LOG_ERROR("Processing QOS task item failed, exiting.");
dumpTuple(consumer, tuple);
return;
case task_process_status::task_need_retry :
SWSS_LOG_INFO("Processing QOS task item failed, will retry.");
dumpTuple(consumer, tuple);
it++;
break;
default:
Expand Down

0 comments on commit 132672f

Please sign in to comment.