Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Buffer Config Manager #417

Merged
merged 10 commits into from
Jan 26, 2018
8 changes: 7 additions & 1 deletion cfgmgr/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
INCLUDES = -I $(top_srcdir) -I $(top_srcdir)/orchagent
CFLAGS_SAI = -I /usr/include/sai

bin_PROGRAMS = vlanmgrd intfmgrd
bin_PROGRAMS = vlanmgrd intfmgrd buffermgrd

if DEBUG
DBGFLAGS = -ggdb -DDEBUG
Expand All @@ -18,3 +18,9 @@ intfmgrd_SOURCES = intfmgrd.cpp intfmgr.cpp $(top_srcdir)/orchagent/orch.cpp $(t
intfmgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
intfmgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
intfmgrd_LDADD = -lswsscommon

buffermgrd_SOURCES = buffermgrd.cpp buffermgr.cpp $(top_srcdir)/orchagent/orch.cpp $(top_srcdir)/orchagent/request_parser.cpp shellcmd.h
buffermgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
buffermgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_SAI)
buffermgrd_LDADD = -lswsscommon

202 changes: 202 additions & 0 deletions cfgmgr/buffermgr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#include <fstream>
#include <iostream>
#include <string.h>
#include "logger.h"
#include "dbconnector.h"
#include "producerstatetable.h"
#include "tokenize.h"
#include "ipprefix.h"
#include "buffermgr.h"
#include "exec.h"
#include "shellcmd.h"

using namespace std;
using namespace swss;

BufferMgr::BufferMgr(DBConnector *cfgDb, DBConnector *stateDb, string pg_lookup_file, const vector<string> &tableNames) :
Orch(cfgDb, tableNames),
m_statePortTable(stateDb, STATE_PORT_TABLE_NAME, CONFIGDB_TABLE_NAME_SEPARATOR),
m_cfgPortTable(cfgDb, CFG_PORT_TABLE_NAME, CONFIGDB_TABLE_NAME_SEPARATOR),
m_cfgCableLenTable(cfgDb, CFG_PORT_CABLE_LEN_TABLE_NAME, CONFIGDB_TABLE_NAME_SEPARATOR),
m_cfgBufferProfileTable(cfgDb, CFG_BUFFER_PROFILE_TABLE_NAME, CONFIGDB_TABLE_NAME_SEPARATOR),
m_cfgBufferPgTable(cfgDb, CFG_BUFFER_PG_TABLE_NAME, CONFIGDB_TABLE_NAME_SEPARATOR),
m_cfgLosslessPgPoolTable(cfgDb, CFG_BUFFER_POOL_TABLE_NAME, CONFIGDB_TABLE_NAME_SEPARATOR)
{
readPgProfileLookupFile(pg_lookup_file);
}

//# speed, cable, size, xon, xoff, threshold
// 40000 5m 34816 18432 16384 1
void BufferMgr::readPgProfileLookupFile(string file)
{
SWSS_LOG_NOTICE("Read lookup configuration file...");

ifstream infile(file);
if (!infile.is_open())
{
return;
}

string line;
while (getline(infile, line))
{
if (line.empty() || (line.at(0) == '#'))
{
continue;
}

istringstream iss(line);
string speed, cable;

iss >> speed;
iss >> cable;
iss >> m_pgProfileLookup[speed][cable].size;
iss >> m_pgProfileLookup[speed][cable].xon;
iss >> m_pgProfileLookup[speed][cable].xoff;
iss >> m_pgProfileLookup[speed][cable].threshold;

SWSS_LOG_NOTICE("PG profile for speed %s and cable %s is: size:%s, xon:%s xoff:%s th:%s",
speed.c_str(), cable.c_str(),
m_pgProfileLookup[speed][cable].size.c_str(),
m_pgProfileLookup[speed][cable].xon.c_str(),
m_pgProfileLookup[speed][cable].xoff.c_str(),
m_pgProfileLookup[speed][cable].threshold.c_str()
);
}

infile.close();
}

void BufferMgr::doCableTask(string port, string cable_length)
{
m_cableLenLookup[port] = cable_length;
}

string BufferMgr::getPgPoolMode()
{
vector<FieldValueTuple> pool_properties;
m_cfgLosslessPgPoolTable.get(INGRESS_LOSSLESS_PG_POOL_NAME, pool_properties);
for (auto& prop : pool_properties)
{
if (fvField(prop) == "mode")
return fvValue(prop);
}
return "";
}

/*
Create/update two tables: profile (in m_cfgBufferProfileTable) and port buffer (in m_cfgBufferPgTable):

"BUFFER_PROFILE": {
"pg_lossless_100G_300m_profile": {
"pool":"[BUFFER_POOL_TABLE:ingress_lossless_pool]",
"xon":"18432",
"xoff":"165888",
"size":"184320",
"dynamic_th":"1"
}
}
"BUFFER_PG" :{
Ethernet44|3-4": {
"profile" : "[BUFFER_PROFILE:pg_lossless_100000_300m_profile]"
}
}
*/
void BufferMgr::doSpeedUpdateTask(string port, string speed)
{
vector<FieldValueTuple> fvVector;
string cable;

if (m_cableLenLookup.count(port) == 0)
{
SWSS_LOG_ERROR("Unable to create/update PG profile for port %s. Cable length is not set", port.c_str());
return;
}

cable = m_cableLenLookup[port];

if (m_pgProfileLookup.count(speed) == 0 || m_pgProfileLookup[speed].count(cable) == 0)
{
SWSS_LOG_ERROR("Unable to create/update PG profile for port %s. No PG profile configured for speed %s and cable length %s",
port.c_str(), speed.c_str(), cable.c_str());
return;
}

// Crete record in BUFFER_PROFILE table
// key format is pg_lossless_<speed>_<cable>_profile
string buffer_pg_key = port + CONFIGDB_TABLE_NAME_SEPARATOR + LOSSLESS_PGS;
string buffer_profile_key = "pg_lossless_" + speed + "_" + cable + "_profile";

// check if profile already exists - if yes - skip creation
m_cfgBufferProfileTable.get(buffer_profile_key, fvVector);
if (fvVector.size() == 0)
{
SWSS_LOG_NOTICE("Creating new profile '%s'", buffer_profile_key.c_str());

string mode = getPgPoolMode();
if (mode.empty())
{
// this should never happen if switch initialized properly
SWSS_LOG_ERROR("PG lossless pool is not yet created");
return;
}

// profile threshold field name
mode += "_th";
string pg_pool_reference = string(CFG_BUFFER_POOL_TABLE_NAME) + CONFIGDB_TABLE_NAME_SEPARATOR + INGRESS_LOSSLESS_PG_POOL_NAME;
fvVector.push_back(make_pair("pool", "[" + pg_pool_reference + "]"));
fvVector.push_back(make_pair("xon", m_pgProfileLookup[speed][cable].xon));
fvVector.push_back(make_pair("xoff", m_pgProfileLookup[speed][cable].xoff));
fvVector.push_back(make_pair("size", m_pgProfileLookup[speed][cable].size));
fvVector.push_back(make_pair(mode, m_pgProfileLookup[speed][cable].threshold));
m_cfgBufferProfileTable.set(buffer_profile_key, fvVector);
}
else
{
SWSS_LOG_NOTICE("Reusing existing profile '%s'", buffer_profile_key.c_str());
}

fvVector.clear();
string profile_ref = string("[") + CFG_BUFFER_PROFILE_TABLE_NAME + CONFIGDB_TABLE_NAME_SEPARATOR + buffer_profile_key + "]";
fvVector.push_back(make_pair("profile", profile_ref));
m_cfgBufferPgTable.set(buffer_pg_key, fvVector);
}

void BufferMgr::doTask(Consumer &consumer)
{
SWSS_LOG_ENTER();

string table_name = consumer.getTableName();

auto it = consumer.m_toSync.begin();
while (it != consumer.m_toSync.end())
{
KeyOpFieldsValuesTuple t = it->second;

string keySeparator = CONFIGDB_KEY_SEPARATOR;
vector<string> keys = tokenize(kfvKey(t), keySeparator[0]);
string port(keys[0]);

string op = kfvOp(t);
if (op == SET_COMMAND)
{
for (auto i : kfvFieldsValues(t))
{
if (table_name == CFG_PORT_CABLE_LEN_TABLE_NAME)
{
// receive and cache cable length table
doCableTask(fvField(i), fvValue(i));
}
// In case of PORT table update, Buffer Manager is interested in speed update only
if (table_name == CFG_PORT_TABLE_NAME && fvField(i) == "speed")
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will the field ever != "speed"?

If yes, why ignoring them is correct? Please at least add comment to state reason why it is safe to ignore them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, we receive updates for other fields as well. Actually every time at least one field in the table gets updated, all subscribers receive notifications about all fields. buffer manager is interested in speed only. I'll add a comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation!

// create/update profile for port
doSpeedUpdateTask(port, fvValue(i));
}
}
}

it = consumer.m_toSync.erase(it);
continue;
}
}
54 changes: 54 additions & 0 deletions cfgmgr/buffermgr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef __BUFFMGR__
#define __BUFFMGR__

#include "dbconnector.h"
#include "producerstatetable.h"
#include "orch.h"

#include <map>
#include <string>

namespace swss {

#define INGRESS_LOSSLESS_PG_POOL_NAME "ingress_lossless_pool"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is assuming ingress lossless pg pool name the right thing to do?

Can this code support both shared ingress pg pool and separate pg pools for lossless and lossy traffic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currently we support buffers update for profiles which use ingress lossless pools only

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it prevent shared pg pool?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just had a discussion with Guohan, this change will affect Broadcom platform in negative way.

On some Broadcom platforms, due to the MMU memory size limitation, we have to allocate a shared buffer pool for all pgs (lossy and lossless), and create 2 buffer profiles on top of the shared pg pool.

I think by hard coding the pg pool name, here is a potential risk for Broadcom platforms that requires shared buffer pool.

There is a buffers.json file checked in for Broadcom TH chip: https://github.com/Azure/sonic-swss/blob/master/swssconfig/sample/th.64ports.buffers.json

Please make sure your code works on this scenario as well.

Thanks,
Ying

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, pool_name is yet another candidate to get to the new table with QOS/BUFFER configuration (besides pg/queue index to generate profile for)
But existing implementation will still work with this platform.
Since the configuration is static no new profiles will be generated on speed change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated config for Force10-S6100 (previously by mistake used td2.32ports.buffers.json for it)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. thanks for the update.

#define LOSSLESS_PGS "3-4"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By defining lossless pgs to 3-4 in code, do we lose the flexibility of choosing any pg as lossless?

I know that in practice we are always have been using 3-4 as lossless pgs, but losing the capability of changing is a bit concerning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

theoretically PGs numbers can be fetched from the PORT_QOS_MAP:pfc_enable.
@lguohan, can we rely on the fact that queues specified in PORT_QOS_MAP:pfc_enable ("3,4") will always be the ones for which we need to update ingress lossless profile on speed change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We had a discussion, we decided to move forward with this change for now and revisit this area and remove the hard coded pg numbers in the future.


typedef struct{
string size;
string xon;
string xoff;
string threshold;
} pg_profile_t;

typedef map<string, pg_profile_t> speed_map_t;
typedef map<string, speed_map_t> pg_profile_lookup_t;

typedef map<string, string> port_cable_length_t;

class BufferMgr : public Orch
{
public:
BufferMgr(DBConnector *cfgDb, DBConnector *stateDb, string pg_lookup_file, const vector<string> &tableNames);
using Orch::doTask;

private:
Table m_statePortTable;
Table m_cfgPortTable;
Table m_cfgCableLenTable;
Table m_cfgBufferProfileTable;
Table m_cfgBufferPgTable;
Table m_cfgLosslessPgPoolTable;

pg_profile_lookup_t m_pgProfileLookup;
port_cable_length_t m_cableLenLookup;
std::string getPgPoolMode();
void readPgProfileLookupFile(std::string);
void doCableTask(string port, string cable_length);
void doSpeedUpdateTask(string port, string speed);

void doTask(Consumer &consumer);
};

}

#endif /* __BUFFMGR__ */
122 changes: 122 additions & 0 deletions cfgmgr/buffermgrd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include <unistd.h>
#include <getopt.h>
#include <vector>
#include <mutex>
#include "dbconnector.h"
#include "select.h"
#include "exec.h"
#include "schema.h"
#include "buffermgr.h"
#include <fstream>
#include <iostream>

using namespace std;
using namespace swss;

/* select() function timeout retry time, in millisecond */
#define SELECT_TIMEOUT 1000

/*
* Following global variables are defined here for the purpose of
* using existing Orch class which is to be refactored soon to
* eliminate the direct exposure of the global variables.
*
* Once Orch class refactoring is done, these global variables
* should be removed from here.
*/
int gBatchSize = 0;
bool gSwssRecord = false;
bool gLogRotate = false;
ofstream gRecordOfs;
string gRecordFile;
/* Global database mutex */
mutex gDbMutex;

void usage()
{
cout << "Usage: buffermgrd -l pg_lookup.ini" << endl;
cout << " -l pg_lookup.ini: PG profile look up table file (mandatory)" << endl;
cout << " format: csv" << endl;
cout << " values: 'speed, cable, size, xon, xoff, dynamic_threshold'" << endl;
}

int main(int argc, char **argv)
{
int opt;
string pg_lookup_file = "";
Logger::linkToDbNative("buffermgrd");
SWSS_LOG_ENTER();

SWSS_LOG_NOTICE("--- Starting buffermgrd ---");

while ((opt = getopt(argc, argv, "l:h")) != -1 )
{
switch (opt)
{
case 'l':
pg_lookup_file = optarg;
break;
case 'h':
usage();
return 1;
default: /* '?' */
usage();
return EXIT_FAILURE;
}
}

if (pg_lookup_file.empty())
{
usage();
return EXIT_FAILURE;
}

try
{
vector<string> cfg_buffer_tables = {
CFG_PORT_TABLE_NAME,
CFG_PORT_CABLE_LEN_TABLE_NAME,
};

DBConnector cfgDb(CONFIG_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);
DBConnector stateDb(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);

BufferMgr buffmgr(&cfgDb, &stateDb, pg_lookup_file, cfg_buffer_tables);

// TODO: add tables in stateDB which interface depends on to monitor list
std::vector<Orch *> cfgOrchList = {&buffmgr};

swss::Select s;
for (Orch *o : cfgOrchList)
{
s.addSelectables(o->getSelectables());
}

SWSS_LOG_NOTICE("starting main loop");
while (true)
{
Selectable *sel;
int fd, ret;

ret = s.select(&sel, &fd, SELECT_TIMEOUT);
if (ret == Select::ERROR)
{
SWSS_LOG_NOTICE("Error: %s!", strerror(errno));
continue;
}
if (ret == Select::TIMEOUT)
{
buffmgr.doTask();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think calling drain() makes more sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, I'll check. Just used the solution similar to initmgrd.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doTask() in base class calls drain:

void Orch::doTask()
{
    for(auto &it : m_consumerMap)
    {
        it.second->drain();
    }
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks for digging it up!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think doTask is virtual function and the call will be dispatched to derived class. but calling doTask() is fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

buffermgr does not override doTask(void) so we will go via Orch::doTask() anyway

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks :-)

continue;
}

auto *c = (Executor *)sel;
c->execute();
}
}
catch(const std::exception &e)
{
SWSS_LOG_ERROR("Runtime error: %s", e.what());
}
return -1;
}
Loading