Skip to content

Commit

Permalink
Add TCP latency application for the ENA device (#93)
Browse files Browse the repository at this point in the history
* Add ENA version of the latency tool

* Fix documentation URLs

* Reduce log level

* Parse log level from command line

* Fix build with latency monitor

* Add synchronous mode
  • Loading branch information
xguerin authored Feb 8, 2024
1 parent fdfade2 commit da7df83
Show file tree
Hide file tree
Showing 14 changed files with 217 additions and 56 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ The Ultra-Low latency TCP/IP Stack.

#### Architecture

* [Transport](https://github.com/IBM/tulips/blob/master/docs/topics/Transport.md)
* [Network stack](https://github.com/IBM/tulips/blob/master/docs/topics/Network-stack.md)
* [Transport](https://github.com/xenogenics/tulips/blob/master/docs/topics/Transport.md)
* [Network stack](https://github.com/xenogenics/tulips/blob/master/docs/topics/Network-stack.md)

#### Integration

* [User interface](https://github.com/IBM/tulips/blob/master/docs/topics/User-interface.md)
* [OpenSSL](https://github.com/IBM/tulips/blob/master/docs/topics/OpenSSL.md)
* [User interface](https://github.com/xenogenics/tulips/blob/master/docs/topics/User-interface.md)
* [OpenSSL](https://github.com/xenogenics/tulips/blob/master/docs/topics/OpenSSL.md)

#### Performance

* [Configuration](https://github.com/IBM/tulips/blob/master/docs/topics/Configuration.md)
* [Test and Performance](https://github.com/IBM/tulips/blob/master/docs/topics/Test-and-Performance.md)
* [Configuration](https://github.com/xenogenics/tulips/blob/master/docs/topics/Configuration.md)
* [Test and Performance](https://github.com/xenogenics/tulips/blob/master/docs/topics/Test-and-Performance.md)
11 changes: 11 additions & 0 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
endif ()

if (LibDPDK_FOUND)
include_directories(${LibDPDK_INCLUDE_DIRS})
add_executable(lat_ena lat_ena.cpp)
target_link_libraries(lat_ena
PRIVATE
tulips_apps_static
tulips_stack_static
tulips_transport_ena_static
tulips_transport_stubs_static)
endif (LibDPDK_FOUND)

if (LibIBVerbs_FOUND)
add_executable(lat_ofed lat_ofed.cpp)
target_link_libraries(lat_ofed
Expand Down
77 changes: 77 additions & 0 deletions apps/lat_ena.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include <tulips/apps/Options.h>
#include <tulips/apps/TCPLatency.h>
#include <tulips/transport/ena/AbstractionLayer.h>
#include <tulips/transport/ena/Device.h>
#include <tulips/transport/ena/Port.h>
#include <chrono>
#include <thread>
#include <tclap/CmdLine.h>

using namespace tulips;
using namespace apps::tcplatency;
using namespace transport;

void
runPort(transport::ena::Port& port, std::atomic<bool>& keep_running)
{
while (keep_running) {
port.run();
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}

int
main(int argc, char** argv)
try {
TCLAP::CmdLine cmd("TULIPS ENA Test", ' ', "1.0");
apps::Options opts(cmd);
cmd.parse(argc, argv);
/*
* Make sure the options are sane.
*/
if (!opts.isSane()) {
return __LINE__;
}
/*
* Create the console logger.
*/
auto logger = system::ConsoleLogger(opts.verbosity());
/*
* Make sure the interface is set.
*/
if (!opts.hasInterface()) {
std::cerr << "--interface must be set" << std::endl;
return -1;
}
/*
* Allocate the EAL and the port.
*/
auto eal = transport::ena::AbstractionLayer::allocate(logger);
auto port = transport::ena::Port(logger, opts.interface(), 2, 1024, 2048);
/*
* Get an ENA device.
*/
auto device = port.next(logger, false);
/*
* Start the port thread.
*/
std::atomic<bool> keep_running = true;
auto pthr = std::thread(runPort, std::ref(port), std::ref(keep_running));
/*
* Call the main function.
*/
int res = opts.isSender() ? Client::run(opts, std::move(device))
: Server::run(opts, std::move(device));
/*
* Terminate the port thread.
*/
keep_running = false;
pthr.join();
/*
* Clean-up.
*/
return res;
} catch (std::exception const& e) {
std::cerr << e.what() << std::endl;
return -1;
}
12 changes: 6 additions & 6 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ Topics and relevant pages:

#### Architecture

* [Transport](https://github.com/IBM/tulips/docs/topics/Transport.md)
* [Network stack](https://github.com/IBM/tulips/docs/topics/Network-stack.md)
* [Transport](https://github.com/xenogenics/tulips/docs/topics/Transport.md)
* [Network stack](https://github.com/xenogenics/tulips/docs/topics/Network-stack.md)

#### Integration

* [User interface](https://github.com/IBM/tulips/docs/topics/User-interface.md)
* [OpenSSL](https://github.com/IBM/tulips/docs/topics/OpenSSL.md)
* [User interface](https://github.com/xenogenics/tulips/docs/topics/User-interface.md)
* [OpenSSL](https://github.com/xenogenics/tulips/docs/topics/OpenSSL.md)

#### Performance

* [Configuration](https://github.com/IBM/tulips/docs/topics/Configuration.md)
* [Test and Performance](https://github.com/IBM/tulips/docs/topics/Test-and-Performance.md)
* [Configuration](https://github.com/xenogenics/tulips/docs/topics/Configuration.md)
* [Test and Performance](https://github.com/xenogenics/tulips/docs/topics/Test-and-Performance.md)
6 changes: 3 additions & 3 deletions docs/topics/Network-stack.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ The protocols are implemented using combinations of processors and producers.
With this combination, protocols can be arranged in pipelines like below:

<p align=center>
<img src="https://github.com/IBM/tulips/blob/master/docs/rsrcs/stack_pipeline.svg" width=80%>
<img src="https://github.com/xenogenics/tulips/blob/master/docs/rsrcs/stack_pipeline.svg" width=80%>
</p>

One end of the pipeline is always a hardware device (`ofed`, `shm`, ...). The
Expand Down Expand Up @@ -198,7 +198,7 @@ segmenting those payloads into MTU-sized payloads and sequentially send them.
When implemented in software this feature is very costly.

<p align=center>
<img src="https://github.com/IBM/tulips/blob/master/docs/rsrcs/segmentation.svg" width=70%>
<img src="https://github.com/xenogenics/tulips/blob/master/docs/rsrcs/segmentation.svg" width=70%>
</p>

Modern NICs offer an optimization named TCP Segmentation Offload (TSO) that
Expand All @@ -218,7 +218,7 @@ as the remote peer advertise a large enough receive window. The TCP layer
supports multiple asynchronous segments before an ACK is received.

<p align=center>
<img src="https://github.com/IBM/tulips/blob/master/docs/rsrcs/asyncsegs.svg" width=80%>
<img src="https://github.com/xenogenics/tulips/blob/master/docs/rsrcs/asyncsegs.svg" width=80%>
</p>

The default configuration can have up to 4 asynchronous segments. The stack can
Expand Down
2 changes: 1 addition & 1 deletion docs/topics/OpenSSL.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ circular buffer and minimize as much as possible redundant copies.
The NIC cards used where ConnectX-5.

<p align=center>
<img src="https://github.com/IBM/tulips/blob/master/docs/exps/usr_krn_ssl.svg" width=100%>
<img src="https://github.com/xenogenics/tulips/blob/master/docs/exps/usr_krn_ssl.svg" width=100%>
</p>
4 changes: 2 additions & 2 deletions docs/topics/Test-and-Performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ The round-trip switch overhead is about 500 ns, close to the advertised 250 ns o
## Results

<p align=center>
<img src="https://github.com/IBM/tulips/blob/master/docs/exps/pkt_per_sec.svg" width=100%>
<img src="https://github.com/xenogenics/tulips/blob/master/docs/exps/pkt_per_sec.svg" width=100%>
</p>

<p align=center>
<img src="https://github.com/IBM/tulips/blob/master/docs/exps/bits_per_sec.svg" width=100%>
<img src="https://github.com/xenogenics/tulips/blob/master/docs/exps/bits_per_sec.svg" width=100%>
</p>

29 changes: 27 additions & 2 deletions include/tulips/api/Connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <tulips/api/Interface.h>
#include <tulips/stack/tcpv4/Connection.h>
#include <tulips/system/Clock.h>
#include <functional>
#include <list>
#include <optional>
Expand Down Expand Up @@ -39,7 +40,6 @@ class Connection
, m_host()
#ifdef TULIPS_ENABLE_LATENCY_MONITOR
, m_count(0)
, m_pre(0)
, m_lat(0)
, m_history()
#endif
Expand Down Expand Up @@ -97,6 +97,32 @@ class Connection
m_opts = 0;
}

/*
* Latency monitoring.
*/

#ifdef TULIPS_ENABLE_LATENCY_MONITOR
void markOnSent(const system::Clock::Value ts) { m_history.push_back(ts); }

void markOnAcked(const system::Clock::Value ts)
{
m_count += 1;
m_lat += ts - m_history.front();
m_history.pop_front();
}

uint64_t latency()
{
uint64_t res = 0;
if (m_count > 0) {
res = m_lat / m_count;
}
m_lat = 0;
m_count = 0;
return res;
}
#endif

private:
#ifdef TULIPS_ENABLE_LATENCY_MONITOR
using History = std::list<system::Clock::Value>;
Expand All @@ -108,7 +134,6 @@ class Connection
std::optional<std::string> m_host;
#ifdef TULIPS_ENABLE_LATENCY_MONITOR
size_t m_count;
system::Clock::Value m_pre;
system::Clock::Value m_lat;
History m_history;
#endif
Expand Down
9 changes: 7 additions & 2 deletions include/tulips/apps/Options.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <tulips/system/Logger.h>
#include <cstdint>
#include <tclap/CmdLine.h>

Expand Down Expand Up @@ -30,10 +31,12 @@ class Options
bool wait() const { return wai.isSet(); }
size_t length() const { return len.getValue(); }
size_t count() const { return cnt.getValue(); }
long cpuId() const { return cpu.getValue(); }
system::Logger::Level verbosity() const { return vrb.getValue(); }
bool isSynchronous() const { return syn.isSet(); }
bool withSSL() const { return ssl.isSet(); }
std::string_view sslCert() const { return crt.getValue(); }
std::string_view sslKey() const { return key.getValue(); }
long cpuId() const { return cpu.getValue(); }

private:
TCLAP::ValueArg<int> usd;
Expand All @@ -52,10 +55,12 @@ class Options
TCLAP::SwitchArg wai;
TCLAP::ValueArg<size_t> len;
TCLAP::ValueArg<size_t> cnt;
TCLAP::ValueArg<long> cpu;
TCLAP::ValueArg<system::Logger::Level> vrb;
TCLAP::SwitchArg syn;
TCLAP::SwitchArg ssl;
TCLAP::ValueArg<std::string> crt;
TCLAP::ValueArg<std::string> key;
TCLAP::ValueArg<long> cpu;
};

}
36 changes: 36 additions & 0 deletions include/tulips/system/Logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,39 @@ class BufferedLogger final : public Logger
};

}

/*
* Converters.
*/

namespace std {

inline istream&
operator>>(istream& is, tulips::system::Logger::Level& level)
{
/*
* Parse the input as a string.
*/
std::string value;
is >> value;
/*
* Convert the value.
*/
if (value == "error" || value == "ERROR") {
level = tulips::system::Logger::Level::Error;
} else if (value == "warning" || value == "WARNING") {
level = tulips::system::Logger::Level::Warning;
} else if (value == "info" || value == "INFO") {
level = tulips::system::Logger::Level::Info;
} else if (value == "debug" || value == "DEBUG") {
level = tulips::system::Logger::Level::Debug;
} else {
level = tulips::system::Logger::Level::Trace;
}
/*
* Done.
*/
return is;
}

}
20 changes: 3 additions & 17 deletions src/api/Client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,6 @@ Client::send(const ID id, const uint32_t len, const uint8_t* const data,
/*
* Send the payload.
*/
#ifdef TULIPS_ENABLE_LATENCY_MONITOR
c.pre = c.pre ?: system::Clock::read();
#endif
return m_tcp.send(id, len, data, off);
}

Expand All @@ -381,13 +378,7 @@ Client::averageLatency(UNUSED const ID id)
/*
* Compute the latency.
*/
uint64_t res = 0;
if (c.count > 0) {
res = system::Clock::nanosecondsOf(c.lat / c.count);
}
c.lat = 0;
c.count = 0;
return res;
return c.latency();
#else
return 0;
#endif
Expand Down Expand Up @@ -498,9 +489,7 @@ void
Client::onSent(UNUSED tcpv4::Connection& c, UNUSED const Timestamp ts)
{
#ifdef TULIPS_ENABLE_LATENCY_MONITOR
Connection& d = m_cns[c.id()];
d.history.push_back(d.pre);
d.pre = 0;
m_cns[c.id()].markOnSent(ts);
#endif
}

Expand All @@ -512,10 +501,7 @@ Client::onAcked(stack::tcpv4::Connection& c, const Timestamp ts,
* Update the latency monitor.
*/
#ifdef TULIPS_ENABLE_LATENCY_MONITOR
Connection& d = m_cns[c.id()];
d.count += 1;
d.lat += system::Clock::read() - d.history.front();
d.history.pop_front();
m_cns[c.id()].markOnAcked(ts);
#endif
/*
* Call the delegate.
Expand Down
7 changes: 6 additions & 1 deletion src/apps/Options.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#include "tulips/system/Logger.h"
#include <tulips/apps/Options.h>
#include <cstdint>

namespace tulips::apps {

using system::Logger;

Options::Options(TCLAP::CmdLine& cmd)
: usd("u", "us", "uS delay between sends", false, 1000, "DELAY", cmd)
, nag("N", "nodelay", "Disable Nagle's algorithm", cmd)
Expand All @@ -20,10 +23,12 @@ Options::Options(TCLAP::CmdLine& cmd)
, wai("w", "wait", "Wait instead of poll", cmd)
, len("l", "length", "Payload length", false, 8, "LEN", cmd)
, cnt("c", "count", "Send count", false, 0, "COUNT", cmd)
, cpu("a", "affinity", "CPU affinity", false, -1, "CPUID", cmd)
, vrb("v", "verbose", "Verbosity", false, Logger::Level::Info, "LEVEL", cmd)
, syn("y", "synchronous", "Synchronous requests", cmd)
, ssl("", "ssl", "Use OpenSSL", cmd)
, crt("", "cert", "SSL certificate", false, "", "PEM", cmd)
, key("", "key", "SSL private key", false, "", "PEM", cmd)
, cpu("", "cpu", "CPU affinity", false, -1, "CPUID", cmd)
{}

bool
Expand Down
Loading

0 comments on commit da7df83

Please sign in to comment.