From bea6f4c199a762deee389f22f98120513c23e963 Mon Sep 17 00:00:00 2001 From: jiangrujie Date: Mon, 18 Sep 2017 19:14:48 +0800 Subject: [PATCH] Add documentation in english --- docs/cn/http_client.md | 15 +-- docs/cn/streaming_log.md | 74 ++-------- docs/en/http_client.md | 231 ++++++++++++++++++++++++++++++++ docs/en/iobuf.md | 97 ++++++++++++++ docs/en/streaming_log.md | 283 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 632 insertions(+), 68 deletions(-) create mode 100644 docs/en/http_client.md create mode 100644 docs/en/iobuf.md create mode 100644 docs/en/streaming_log.md diff --git a/docs/cn/http_client.md b/docs/cn/http_client.md index bbe34d5a4f..b689ca5216 100644 --- a/docs/cn/http_client.md +++ b/docs/cn/http_client.md @@ -83,7 +83,7 @@ URL的一般形式如下图: 确实,在简单使用场景下,这两者有所重复,但在复杂场景中,两者差别很大,比如: -- 访问挂在bns下的多个http server。此时Channel.Init传入的是bns节点名称,对uri()的赋值则是包含Host的完整URL(比如"www.foo.com/index.html?name=value"),BNS下所有的http server都会看到"Host: www.foo.com";uri()也可以是只包含路径的URL,比如"/index.html?name=value",框架会以目标server的ip和port为Host,地址为10.46.188.39:8989的http server将会看到"Host: 10.46.188.39:8989"。 +- 访问挂在bns下的多个http server。此时Channel.Init传入的是bns节点名称,对uri()的赋值则是包含Host的完整URL(比如"www.foo.com/index.html?name=value"),BNS下所有的http server都会看到"Host: [www.foo.com](http://www.foo.com/)";uri()也可以是只包含路径的URL,比如"/index.html?name=value",框架会以目标server的ip和port为Host,地址为10.46.188.39:8989的http server将会看到"Host: 10.46.188.39:8989"。 - 通过http proxy访问目标server。此时Channel.Init传入的是proxy server的地址,但uri()填入的是目标server的URL。 # 常见设置 @@ -132,7 +132,7 @@ os.move_to(cntl->request_attachment()); Notes on http header: -- 根据 HTTP 协议[规定](http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2), header 的 field_name部分不区分大小写。在r33861前,field_name都转为了小写,在r33861后,大小写能保持不变(仍然支持大小写不敏感)。 +- 根据 HTTP 协议[规定](http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2), header 的 field_name部分不区分大小写。brpc对于field_name大小写保持不变,且仍然支持大小写不敏感。 - 如果 HTTP 头中出现了相同的 field_name, 根据协议[规定](http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2),value将被合并到一起, 中间用逗号(,) 分隔, 具体value如何理解,需要用户自己确定. - query之间用"&"分隔, key和value之间用"="分隔, value可以省略,比如key1=value1&key2&key3=value3中key2是合理的query,值为空字符串。 @@ -144,12 +144,11 @@ Notes on http header: 当Server返回的http status code不是2xx时,该次http访问即视为失败,client端会设置对应的ErrorCode: -- 在r31923前,1xx和3xx错误对应EPROTONOSUPPORT,4xx错误对应EREQUEST,其余错误对应EINTERNAL。http body不会置入`cntl->response_attachment()`。 -- 在r31923后,所有错误被统一为EHTTP。如果用户发现`cntl->ErrorCode()`为EHTTP,那么可以检查`cntl->http_response().status_code()`以获得具体的http错误。同时http body会置入`cntl->response_attachment()`,用户可以把代表错误的html或json传递回来。 +- 所有错误被统一为EHTTP。如果用户发现`cntl->ErrorCode()`为EHTTP,那么可以检查`cntl->http_response().status_code()`以获得具体的http错误。同时http body会置入`cntl->response_attachment()`,用户可以把代表错误的html或json传递回来。 # 压缩request body -在r33877后,调用Controller::set_request_compress_type(brpc::COMPRESS_TYPE_GZIP)可将http body用gzip压缩,并设置"Content-Encoding"为"gzip"。 +调用Controller::set_request_compress_type(brpc::COMPRESS_TYPE_GZIP)可将http body用gzip压缩,并设置"Content-Encoding"为"gzip"。 # 解压response body @@ -172,9 +171,9 @@ if (encoding != NULL && *encoding == "gzip") { # 持续下载 -r33796前brpc client在下载一个超长的body时,需要一直等待直到body完整才会视作RPC结束,这个过程中超长body都会存在内存中,如果body是无限长的(比如直播用的flv文件),那么内存会持续增长,直到超时。换句话说,r33796前的brpc client不适合下载大文件。 +通常下载一个超长的body时,需要一直等待直到body完整才会视作RPC结束,这个过程中超长body都会存在内存中,如果body是无限长的(比如直播用的flv文件),那么内存会持续增长,直到超时。这样的http client不适合下载大文件。 -r33796后brpc client支持在读取完body前就结束RPC,让用户在RPC结束后再读取持续增长的body。注意这个功能不等同于“支持http chunked mode”,brpc的http实现一直支持解析chunked mode,这里的问题是如何让用户处理超长或无限长的body,和body是否以chunked mode传输无关。 +brpc client支持在读取完body前就结束RPC,让用户在RPC结束后再读取持续增长的body。注意这个功能不等同于“支持http chunked mode”,brpc的http实现一直支持解析chunked mode,这里的问题是如何让用户处理超长或无限长的body,和body是否以chunked mode传输无关。 使用方法如下: @@ -215,4 +214,4 @@ r33796后brpc client支持在读取完body前就结束RPC,让用户在RPC结 # 访问带认证的Server -根据Server的认证方式生成对应的auth_data,并设置为http header "Authorization"的值。比如用的是curl,那就加上选项`-H "Authorization : "。`查询[giano文档](http://doc.noah.baidu.com/new/baas/base_tool.md)了解如何在Shell中生成auth_data。 +根据Server的认证方式生成对应的auth_data,并设置为http header "Authorization"的值。比如用的是curl,那就加上选项`-H "Authorization : "。` diff --git a/docs/cn/streaming_log.md b/docs/cn/streaming_log.md index a65dd3be34..b951b438ab 100644 --- a/docs/cn/streaming_log.md +++ b/docs/cn/streaming_log.md @@ -16,52 +16,12 @@ PLOG(FATAL) << "Fail to call function setting errno"; VLOG(1) << "verbose log tier 1"; CHECK_GT(1, 2) << "1 can't be greater than 2"; -// public/common >= r32401支持限制打印频率。 LOG_EVERY_SECOND(INFO) << "High-frequent logs"; LOG_EVERY_N(ERROR, 10) << "High-frequent logs"; LOG_FIRST_N(INFO, 20) << "Logs that prints for at most 20 times"; LOG_ONCE(WARNING) << "Logs that only prints once"; ``` -## 配置comlog - -```c++ -// logging默认重定向至comlog,要配置comlog的话,要额外include comlog_sink.h -#include - -// 从./conf/log.conf读取comlog的配置。SetupFromConfig是我们提供的封装函数,不用像com_loadlog那样区分path和file。 -if (logging::ComlogSink::GetInstance()->SetupFromConfig("conf/log.conf") != 0) { - LOG(ERROR) << "Fail to setup comlog from conf/log.conf"; - return -1; -} - -OR - -// 直接调用com_loadlog从./conf/log.conf读取comlog的配置。 -if (com_loadlog("./conf", "log.conf") != 0) { - LOG(ERROR) << "Fail to com_loadlog"; - return -1; -} - -OR - -// 把日志打入./my_log/.log中,comlog选项取默认值。 -logging::ComlogSinkOptions options; -options.log_dir = "my_log"; -if (logging::ComlogSink::GetInstance()->Setup(&options) != 0) { - LOG(ERROR) << "Fail to setup comlog from options"; - return -1; -} - -OR - -// 把日志打入./log/.log中,comlog选项取默认值。 -if (logging::ComlogSink::GetInstance()->Setup(NULL) != 0) { - LOG(ERROR) << "Fail to setup comlog by default options"; - return -1; -} -``` - # DESCRIPTION 流式日志是打印复杂对象或模板对象的不二之选。大部分业务对象都很复杂,如果用printf形式的函数打印,你需要先把对象转成string,才能以%s输出。但string组合起来既不方便(比如没法append数字),还得分配大量的临时内存(string导致的)。C++中解决这个问题的方法便是“把日志流式地送入std::ostream对象”。比如为了打印对象A,那么我们得实现如下的函数: @@ -122,17 +82,17 @@ LOG(WARNING) << "Unusual thing happened ..." << ...; LOG(TRACE) << "Something just took place..." << ...; ``` -streaming log的日志等级是comlog和glog的合集,具体的来说,下表是日志等级的映射关系: +streaming log的日志等级与glog映射关系如下: -| streaming log | comlog | glog | 使用场景 | -| ------------- | ---------------------------- | -------------------- | ---------------------------------------- | -| FATAL | COMLOG_FATAL | FATAL (coredump) | 致命错误。但由于百度内大部分FATAL实际上非致命,所以streaming log的FATAL默认不像glog那样直接coredump,除非打开了[-crash_on_fatal_log](http://brpc.baidu.com:8765/flags/crash_on_fatal_log) | -| ERROR | COMLOG_FATAL | ERROR | 不致命的错误。 | -| WARNING | COMLOG_WARNING | WARNING | 不常见的分支。 | -| NOTICE | COMLOG_NOTICE | - | 一般来说你不应该使用NOTICE,它用于打印重要的业务日志,若要使用务必和检索端同学确认。glog没有NOTICE。 | -| INFO, TRACE | COMLOG_TRACE | INFO | 打印重要的副作用。比如打开关闭了某某资源之类的。 | -| VLOG(n) | COMLOG_TRACE | INFO | 打印分层的详细日志。 | -| DEBUG | COMLOG_TRACEVLOG(1) (NDEBUG) | INFOVLOG(1) (NDEBUG) | 仅为代码兼容性,基本没有用。若要使日志仅在未定义NDEBUG时才打印,用DLOG/DPLOG/DVLOG等即可。 | +| streaming log | glog | 使用场景 | +| ------------- | -------------------- | ---------------------------------------- | +| FATAL | FATAL (coredump) | 致命错误。但由于百度内大部分FATAL实际上非致命,所以streaming log的FATAL默认不像glog那样直接coredump,除非打开了[-crash_on_fatal_log](http://brpc.baidu.com:8765/flags/crash_on_fatal_log) | +| ERROR | ERROR | 不致命的错误。 | +| WARNING | WARNING | 不常见的分支。 | +| NOTICE | - | 一般来说你不应该使用NOTICE,它用于打印重要的业务日志,若要使用务必和检索端同学确认。glog没有NOTICE。 | +| INFO, TRACE | INFO | 打印重要的副作用。比如打开关闭了某某资源之类的。 | +| VLOG(n) | INFO | 打印分层的详细日志。 | +| DEBUG | INFOVLOG(1) (NDEBUG) | 仅为代码兼容性,基本没有用。若要使日志仅在未定义NDEBUG时才打印,用DLOG/DPLOG/DVLOG等即可。 | ## PLOG @@ -148,7 +108,7 @@ if (fd < 0) { ## noflush -如果你暂时不希望刷入comlog,加上noflush。这一般会用在打印循环中: +如果你暂时不希望刷到屏幕,加上noflush。这一般会用在打印循环中: ```c++ LOG(TRACE) << "Items:" << noflush; @@ -158,7 +118,7 @@ for (iterator it = items.begin(); it != items.end(); ++it) { LOG(TRACE); ``` -前两次TRACE日志都没有刷到comlog,而是还记录在thread-local缓冲中,第三次TRACE日志则把缓冲都刷入了comlog。如果items里面有三个元素,不加noflush的打印结果可能是这样的: +前两次TRACE日志都没有刷到屏幕,而是还记录在thread-local缓冲中,第三次TRACE日志则把缓冲都刷入了屏幕。如果items里面有三个元素,不加noflush的打印结果可能是这样的: ``` TRACE: ... Items: @@ -173,7 +133,7 @@ TRACE: ... item3 TRACE: ... Items: item1 item2 item3 ``` -r34694前noflush和调用处的pthread绑定,如果在noflush后发送了RPC(可能跨越pthread),那么日志输出可能不符合预期。r34694后noflush支持bthread,可以实现类似于UB的pushnotice的效果,即检索线程一路打印都暂不刷出(加上noflush),直到最后检索结束时再一次性刷出。注意,如果检索过程是异步的,就不应该使用noflush,因为异步显然会跨越bthread,使noflush仍然失效。 +noflush支持bthread,可以实现类似于UB的pushnotice的效果,即检索线程一路打印都暂不刷出(加上noflush),直到最后检索结束时再一次性刷出。注意,如果检索过程是异步的,就不应该使用noflush,因为异步显然会跨越bthread,使noflush仍然失效。 ## LOG_IF @@ -299,7 +259,7 @@ CHECK(x > y); // Check failed: x > y. ## LogSink -streaming log通过logging::SetLogSink修改日志刷入的目标,默认是屏幕。用户可以继承LogSink,实现自己的日志打印逻辑。我们默认提供了两个LogSink实现: +streaming log通过logging::SetLogSink修改日志刷入的目标,默认是屏幕。用户可以继承LogSink,实现自己的日志打印逻辑。我们默认提供了个LogSink实现: ### StringSink @@ -316,9 +276,3 @@ TEST_F(StreamingLogTest, log_at) { ::logging::SetLogSink(old_sink); } ``` - -### ComlogSink - -定义在butil/comlog_sink.h中,把日志打印入comlog,主要用于线上系统,用法见[SYNOPSIS](#SYNOPSIS)一段。 - -使用ComlogSink的streaming log可以和com_writelog, ul_writelog混用。你并不需要把程序中所有日志都换成streaming log。 diff --git a/docs/en/http_client.md b/docs/en/http_client.md new file mode 100644 index 0000000000..9abeda967e --- /dev/null +++ b/docs/en/http_client.md @@ -0,0 +1,231 @@ +Examples for Http Client: [example/http_c++](https://github.com/brpc/brpc/blob/master/example/http_c++/http_client.cpp) + +# Create Channel + +In order to use`brpc::Channel` to access the HTTP service, `ChannelOptions.protocol` must be specified as `PROTOCOL_HTTP`. + +After setting the HTTP protocol, the first parameter of `Channel::Init` can be any valid URL. *Note*: We only use the host and port part inside the URL here in order to save the user from additional parsing work. Other parts of the URL in `Channel::Init` will be discarded. + +```c++ +brpc::ChannelOptions options; +options.protocol = brpc::PROTOCOL_HTTP; +if (channel.Init("www.baidu.com" /*any url*/, &options) != 0) { + LOG(ERROR) << "Fail to initialize channel"; + return -1; +} +``` + +http channel also support BNS address. + +# GET + +```c++ +brpc::Controller cntl; +cntl.http_request().uri() = "www.baidu.com/index.html"; // Request URL +channel.CallMethod(NULL, &cntl, NULL, NULL, NULL/*done*/); +``` + +HTTP has nothing to do with protobuf, so every parameters of `CallMethod` are NULL except `Controller` and `done`, which can be used to issue RPC asynchronously. + +`cntl.response_attachment ()` is the response body whose type is `butil :: IOBuf`. Note that converting `IOBuf` to `std :: string` using `to_string()` needs to allocate memory and copy all the content. As a result, if performance comes first, you should use `IOBuf` directly rather than continuous memory. + +# POST + +The default HTTP Method is GET. You can set the method to POST if needed, and you should append the POST data into `request_attachment()`, which ([butil::IOBuf](https://github.com/brpc/brpc/blob/master/src/butil/iobuf.h)) supports `std :: string` or `char *` + +```c++ +brpc::Controller cntl; +cntl.http_request().uri() = "..."; // Request URL +cntl.http_request().set_method(brpc::HTTP_METHOD_POST); +cntl.request_attachment().append("{\"message\":\"hello world!\"}"); +channel.CallMethod(NULL, &cntl, NULL, NULL, NULL/*done*/); +``` + +If you need a lot print, we suggest using `butil::IOBufBuilder`, which has the same interface as `std::ostringstream`. It's much simpler and more efficient to print lots of objects using `butil::IOBufBuilder`. + +```c++ +brpc::Controller cntl; +cntl.http_request().uri() = "..."; // Request URL +cntl.http_request().set_method(brpc::HTTP_METHOD_POST); +butil::IOBufBuilder os; +os << "A lot of printing" << printable_objects << ...; +os.move_to(cntl.request_attachment()); +channel.CallMethod(NULL, &cntl, NULL, NULL, NULL/*done*/); +``` + +# URL + +Below is the normal form of an URL: + +``` +// URI scheme : http://en.wikipedia.org/wiki/URI_scheme +// +// foo://username:password@example.com:8042/over/there/index.dtb?type=animal&name=narwhal#nose +// \_/ \_______________/ \_________/ \__/ \___/ \_/ \______________________/ \__/ +// | | | | | | | | +// | userinfo host port | | query fragment +// | \________________________________/\_____________|____|/ \__/ \__/ +// schema | | | | | | +// authority | | | | | +// path | | interpretable as keys +// | | +// \_______________________________________________|____|/ \____/ \_____/ +// | | | | | +// hierarchical part | | interpretable as values +// | | +// interpretable as filename | +// | +// | +// interpretable as extension +``` + +Here's the question, why to pass URL parameter twice (via `set_uri`) instead of using the URL inside `Channel::Init()` ? + +For most simple cases, it's a repeat work. But in complex scenes, they are very different in: + +- Access multiple servers under a BNS node. At this time `Channel::Init` accepts the BNS node name, the value of `set_uri()` is the whole URL including Host (such as `www.foo.com/index.html?name=value`). As a result, all servers under BNS will see `Host: www.foo.com`. `set_uri()` also takes URL with the path only, such as `/index.html?name=value`. RPC framework will automatically fill the `Host` header using of the target server's ip and port. For example, http server at 10.46.188.39: 8989 will see `Host: 10.46.188.39: 8989`. +- Access the target server via http proxy. At this point `Channel::Init` takes the address of the proxy server, while `set_uri()` takes the URL of the target server. + +# Basic Usage + +We use `http request` as example (which is the same to `http response`). Here's some basic operations: + +Access an HTTP header named `Foo` + +```c++ +const std::string* value = cntl->http_request().GetHeader("Foo"); // NULL when not exist +``` + +Set an HTTP header named `Foo` + +```c++ +cntl->http_request().SetHeader("Foo", "value"); +``` + +Access a query named `Foo` + +```c++ +const std::string* value = cntl->http_request().uri().GetQuery("Foo"); // NULL when not exist +``` + +Set a query named `Foo` + +```c++ +cntl->http_request().uri().SetQuery("Foo", "value"); +``` + +Set HTTP method + +```c++ +cntl->http_request().set_method(brpc::HTTP_METHOD_POST); +``` + +Set the URL + +```c++ +cntl->http_request().uri() = "http://www.baidu.com"; +``` + +Set the `content-type` + +```c++ +cntl->http_request().set_content_type("text/plain"); +``` + +Access HTTP body + +```c++ +butil::IOBuf& buf = cntl->request_attachment(); +std::string str = cntl->request_attachment().to_string(); // trigger copy underlying +``` + +Set HTTP body + +```c++ +cntl->request_attachment().append("...."); +butil::IOBufBuilder os; os << "...."; +os.move_to(cntl->request_attachment()); +``` + +Notes on http header: + +- The field_name of the header is case-insensitive according to [standard](http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2). The framework supports that while leaving the case unchanged. +- If we have multiple headers with the same field_name, according to [standard](http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2), values will be merged together separating by comma (,). Users should figure out how to use this value according to own needs. +- Queries are separated by "&", while key and value are partitioned by "=". Value may be omitted. For example, `key1=value1&key2&key3=value3` is a valid query string, and the value for `key2` is an empty string. + +# Debug for HTTP client + +Turn on [-http_verbose](http://brpc.baidu.com:8765/flags/http_verbose) so that the framework will print each request and response in stderr. Note that this should only be used for test and debug rather than online cases. + +# Error Handle for HTTP + +When server returns a non-2xx HTTP status code, the HTTP request is considered to be failed and sets the corresponding ErrorCode: + +- All errors are unified as `EHTTP`. If you find `cntl->ErrorCode()` as `EHTTP`, you can check `cntl-> http_response().status_code()` to get a more specific HTTP error. In the meanwhile, HTTP body will be placed inside `cntl->response_attachment()`, you can check for error body such as html or json there. + +# Compress Request Body + +Call `Controller::set_request_compress_type(brpc::COMPRESS_TYPE_GZIP)` and then the framework will use gzip to compress HTTP body and set `Content-Encoding` to gzip. + +# Decompress Response Body + +For generality, brpc will not decompress response body automatically. You can do it yourself as the code won't be complicate: + +```c++ +#include +... +const std::string* encoding = cntl->http_response().GetHeader("Content-Encoding"); +if (encoding != NULL && *encoding == "gzip") { + butil::IOBuf uncompressed; + if (!brpc::policy::GzipDecompress(cntl->response_attachment(), &uncompressed)) { + LOG(ERROR) << "Fail to un-gzip response body"; + return; + } + cntl->response_attachment().swap(uncompressed); +} +// Now cntl->response_attachment() contains the decompressed data +``` + +# Continuous Download + +When downloading a large file, normally the client needs to wait until the whole file has been loaded into its memory to finish this RPC. In order to leverage the problem of memory growth and RPC resourses, in brpc the client can end its RPC first and then continuously read the rest of the file. Note that it's not HTTP chunked mode as brpc always supports for parsing chunked mode body. This is the solution to allow user the deal with super large body. + +Basic usage: + +1. Implement ProgressiveReader: + + ```c++ + #include + ... + class ProgressiveReader { + public: + // Called when one part was read. + // Error returned is treated as *permenant* and the socket where the + // data was read will be closed. + // A temporary error may be handled by blocking this function, which + // may block the HTTP parsing on the socket. + virtual butil::Status OnReadOnePart(const void* data, size_t length) = 0; + + // Called when there's nothing to read anymore. The `status' is a hint for + // why this method is called. + // - status.ok(): the message is complete and successfully consumed. + // - otherwise: socket was broken or OnReadOnePart() failed. + // This method will be called once and only once. No other methods will + // be called after. User can release the memory of this object inside. + virtual void OnEndOfMessage(const butil::Status& status) = 0; + }; + ``` + + `OnReadOnePart` is called each time data is read. `OnEndOfMessage` is called each time data has finished or connection has broken. Please refer to comments before implementing. + +2. Set `cntl.response_will_be_read_progressively();` before RPC so that brpc knows to end RPC after reading the header part. + +3. Call `cntl.ReadProgressiveAttachmentBy(new MyProgressiveReader);` after RPC so that you can use your own implemented object `MyProgressiveReader` . You may delete this object inside `OnEndOfMessage`. + +# Continuous Upload + +Currently the POST data should be intact so that we do not support large POST body. + +# Access Server with Authentication + +Generate `auth_data` according to the server's authentication method and then set it into header `Authorization`. This is the same as using curl to add option `-H "Authorization : "`. \ No newline at end of file diff --git a/docs/en/iobuf.md b/docs/en/iobuf.md new file mode 100644 index 0000000000..1992c1c421 --- /dev/null +++ b/docs/en/iobuf.md @@ -0,0 +1,97 @@ +brpc uses [butil::IOBuf](https://github.com/brpc/brpc/blob/master/src/butil/iobuf.h) as data structure for attachment storage and HTTP body. It is a non-contiguous zero copy buffer, which has been proved in other projects as excellent performance. The interface of `IOBuf` is similar to `std::string`, but not the same. + +If you used the `BufHandle` in Kylin before, you should notice the difference in convenience of `IOBuf`: the former hardly had any encapsulation, leaving the internal structure directly in front of the user. The user must carefully handle the reference count, which is very error prone, leading to lots of bugs. + +# What IOBuf can: + +- Default constructor doesn't involve copying. +- Explicit copy doesn't change source IOBuf. Only copy the management structure of IOBuf instead of the data. +- Append another IOBuf without copy. +- Append string involves copy. +- Read from/Write into fd. +- Convert to protobuf and vice versa. +- IOBufBuilder可以把IOBuf当std::ostream用。 + +# What IOBuf can't: + +- Used as general storage structure. IOBuf should not keep a long life cycle to prevent multiple memory blocks (8K each) being locked by one IOBuf object. + +# Slice + +Slice 16 bytes from IOBuf: + +```c++ +source_buf.cut(&heading_iobuf, 16); // cut all bytes of source_buf when its length < 16 +``` + +Remove 16 bytes: + +```c++ +source_buf.pop_front(16); // Empty source_buf when its length < 16 +``` + +# Concatenate + +Append to another IOBuf: + +```c++ +buf.append(another_buf); // no data copy +``` + +Append std::string + +```c++ +buf.append(str); // copy data of str into buf +``` + +# Parse + +Parse protobuf from IOBuf + +```c++ +IOBufAsZeroCopyInputStream wrapper(&iobuf); +pb_message.ParseFromZeroCopyStream(&wrapper); +``` + +Parse IOBuf as user-defined structure + +```c++ +IOBufAsZeroCopyInputStream wrapper(&iobuf); +CodedInputStream coded_stream(&wrapper); +coded_stream.ReadLittleEndian32(&value); +... +``` + +# Serialize + +Serialize protobuf into IOBuf + +```c++ +IOBufAsZeroCopyOutputStream wrapper(&iobuf); +pb_message.SerializeToZeroCopyStream(&wrapper); +``` + +Append printable data into IOBuf + +```c++ +IOBufBuilder os; +os << "anything can be sent to std::ostream"; +os.buf(); // IOBuf +``` + +# Print + +```c++ +std::cout << iobuf; +std::string str = iobuf.to_string(); +``` + +# Performance + +IOBuf has excellent performance in general aspects: + +| Action | Throughput | QPS | +| ---------------------------------------- | ----------- | ------- | +| Read from file -> Slice 12+16 bytes -> Copy -> Merge into another buffer ->Write to /dev/null | 240.423MB/s | 8586535 | +| Read from file -> Slice 12+128 bytes -> Copy-> Merge into another buffer ->Write to /dev/null | 790.022MB/s | 5643014 | +| Read from file -> Slice 12+1024 bytes -> Copy-> Merge into another buffer ->Write to /dev/null | 1519.99MB/s | 1467171 | \ No newline at end of file diff --git a/docs/en/streaming_log.md b/docs/en/streaming_log.md new file mode 100644 index 0000000000..0412118df7 --- /dev/null +++ b/docs/en/streaming_log.md @@ -0,0 +1,283 @@ +# Name + +streaming_log - Print log to std::ostreams + +# SYNOPSIS + +```c++ +#include + +LOG(FATAL) << "Fatal error occurred! contexts=" << ...; +LOG(WARNING) << "Unusual thing happened ..." << ...; +LOG(TRACE) << "Something just took place..." << ...; +LOG(TRACE) << "Items:" << noflush; +LOG_IF(NOTICE, n > 10) << "This log will only be printed when n > 10"; +PLOG(FATAL) << "Fail to call function setting errno"; +VLOG(1) << "verbose log tier 1"; +CHECK_GT(1, 2) << "1 can't be greater than 2"; + +LOG_EVERY_SECOND(INFO) << "High-frequent logs"; +LOG_EVERY_N(ERROR, 10) << "High-frequent logs"; +LOG_FIRST_N(INFO, 20) << "Logs that prints for at most 20 times"; +LOG_ONCE(WARNING) << "Logs that only prints once"; +``` + +# DESCRIPTION + +Streaming log is the best choice for printing complex objects or template objects. As most objects are complicate, user needs to convert all the fields to string first in order to use `printf` with `%s`. However it's very inconvenient (can't append numbers) and needs lots of temporary memory (caused by string). The solution in C++ is to send the log as a stream to the `std::ostream` object. For example, in order to print object A, we need to implement the following interface: + +```c++ +std::ostream& operator<<(std::ostream& os, const A& a); +``` + +The signature of the function means to print object `a` to `os` and then return `os`. The return value of `os` enables us to combine binary operator `<<` (left-combine). As a result, `os << a << b << c;` means `operator<<(operator<<(operator<<(os, a), b), c);`. Apparently `operator<<` needs a returning reference to complete this process, which is also called chaining. In languages that don't support operator overloading, you will see a more tedious form, such as `os.print(a).print(b).print(c)`. + +You should also use chaining in your own implementation of `operator<<`. In fact, printing a complex object is like DFS a tree: Call `operator<<` on each child node, and then each child node invokes the function on the grandchild node, and so forth. For example, object A has two member variables: B and C. Printing A becomes the process of putting B and C ostream: + +```c++ +struct A { + B b; + C c; +}; +std::ostream& operator<<(std::ostream& os, const A& a) { + return os << "A{b=" << a.b << ", c=" << a.c << "}"; +} +``` + +Data structure of B and C along with the print function: + +```c++ +struct B { + int value; +}; +std::ostream& operator<<(std::ostream& os, const B& b) { + return os << "B{value=" << b.value << "}"; +} + +struct C { + string name; +}; +std::ostream& operator<<(std::ostream& os, const C& c) { + return os << "C{name=" << c.name << "}"; +} +``` + +Finally the result of printing object A is: + +``` +A{b=B{value=10}, c=C{name=tom}} +``` + +This way we don't need to allocate temporary memory since objects are directly passed into the ostream object. Of course, the memory management of ostream itself is another topic. + +OK, now we connect the whole printing process by ostream. The most common ostream objects are `std::cout` and `std::cerr`, so objects implement the above function can be directly sent to `std::cout` and `std::cerr`. In other words, if a log stream also inherits ostream, then these objects can be written into log. Streaming log is such a log stream that inherits `std::ostream` to send the object into the log. In the current implementation, the logs are recorded in a thread-local buffer, which will be flushed into screen or ` logging::LogSink` after a complete log record. Of course, the implementation is thread safe. + +## LOG + +If you have ever used glog before, you should find it easy to start. The log macro is the same as glog. For example, to print a FATAL log (Note that there is no `std::endl`): + +```c++ +LOG(FATAL) << "Fatal error occurred! contexts=" << ...; +LOG(WARNING) << "Unusual thing happened ..." << ...; +LOG(TRACE) << "Something just took place..." << ...; +``` + +The log level of streaming log in accordance with glog: + +| streaming log | glog | Use Cases | +| ------------- | -------------------- | ---------------------------------------- | +| FATAL | FATAL (coredump) | Fatal error. Since most fatal log inside baidu is not fatal actually, it won't trigger coredump directly as glog, unless you turn on [-crash_on_fatal_log](http://brpc.baidu.com:8765/flags/crash_on_fatal_log) | +| ERROR | ERROR | Non-fatal error. | +| WARNING | WARNING | Unusual branches | +| NOTICE | - | Generally you should not use NOTICE as it's intended for important business logs. Make sure to check with other developers. glog doesn't have NOTICE. | +| INFO, TRACE | INFO | Important side effects such as open/close some resources. | +| VLOG(n) | INFO | Detailed log that support multiple layers. | +| DEBUG | INFOVLOG(1) (NDEBUG) | Just for compatibility. Print logs only when `NDEBUG` is not defined. See DLOG/DPLOG/DVLOG for more reference. | + +## PLOG + +The difference of PLOG and LOG is that it will append error information at the end of log. It's kind of like `%m` in `printf`. Under POSIX environment, the error code is `errno`。 + +```c++ +int fd = open("foo.conf", O_RDONLY); // foo.conf does not exist, errno was set to ENOENT +if (fd < 0) { + PLOG(FATAL) << "Fail to open foo.conf"; // "Fail to open foo.conf: No such file or directory" + return -1; +} +``` + +## noflush + +If you don't want to flush the log at once, append `noflush`. It's commonly used inside a loop: + +```c++ +LOG(TRACE) << "Items:" << noflush; +for (iterator it = items.begin(); it != items.end(); ++it) { + LOG(TRACE) << ' ' << *it << noflush; +} +LOG(TRACE); +``` + +The first two LOG(TRACE) doesn't flush the log to the screen. They are recorded inside the thread-local buffer. The third LOG(TRACE) flush all logs into the screen. If there are 3 elements inside items and we don't append `noflush`, the result would be: + +``` +TRACE: ... Items: +TRACE: ... item1 +TRACE: ... item2 +TRACE: ... item3 +``` + +After we add `noflush`: + +``` +TRACE: ... Items: item1 item2 item3 +``` + +The `noflush` feature also support bthread so that we can push lots of logs from the server's bthreads without actually print them (using `noflush`), and flush the whole log at the end of RPC. Note that you should not use `noflush` when implementing an asynchronous method since it will change the underlying bthread, leaving `noflush` out of function. + +## LOG_IF + +`LOG_IF(log_level, condition)` prints only when condition is true. It's the same as `if (condition) { LOG() << ...; }` with shorter code: + +```c++ +LOG_IF(NOTICE, n > 10) << "This log will only be printed when n > 10"; +``` + +## XXX_EVERY_SECOND + +XXX represents for LOG, LOG_IF, PLOG, SYSLOG, VLOG, DLOG, and so on. These logging macros print log at most once per second. You can use these to check running status inside hotspot area. The first call to this macro prints the log immediately, and costs additional 30ns (caused by gettimeofday) compared to normal LOG. + +```c++ +LOG_EVERY_SECOND(INFO) << "High-frequent logs"; +``` + +## XXX_EVERY_N + +XXX represents for LOG, LOG_IF, PLOG, SYSLOG, VLOG, DLOG, and so on. These logging macros print log every N times. You can use these to check running status inside hotspot area. The first call to this macro prints the log immediately, and costs an additional atomic operation (relaxed order) compared to normal LOG. This macro is thread safe which means counting from multiple threads is also accurate while glog is not. + +```c++ +LOG_EVERY_N(ERROR, 10) << "High-frequent logs"; +``` + +## XXX_FIRST_N + +XXX represents for LOG, LOG_IF, PLOG, SYSLOG, VLOG, DLOG, and so on. These logging macros print log at most N times. It costs an additional atomic operation (relaxed order) compared to normal LOG before N, and zero cost after. + +```c++ +LOG_FIRST_N(ERROR, 20) << "Logs that prints for at most 20 times"; +``` + +## XXX_ONCE + +XX represents for LOG, LOG_IF, PLOG, SYSLOG, VLOG, DLOG, and so on. These logging macros print log at most once. It's the same as `XXX_FIRST_N(..., 1)` + +```c++ +LOG_ONCE(ERROR) << "Logs that only prints once"; +``` + +## VLOG + +VLOG(verbose_level) is detail log that support multiple layers. It uses 2 gflags: *--verbose* and *--verbose_module* to control the logging layer you want (Note that glog uses *--v* and *--vmodule*). The log will be printed only when `--verbose` >= `verbose_level`: + +```c++ +VLOG(0) << "verbose log tier 0"; +VLOG(1) << "verbose log tier 1"; +VLOG(2) << "verbose log tier 2"; +``` + +When `--verbose=1`, the first 2 log will be printed while the last won't. Module means a file or file path without the extension name, and value of `--verbose_module` will overwrite `--verbose`. For example: + +```bash +--verbose=1 --verbose_module="channel=2,server=3" # print VLOG of those with verbose value: + # channel.cpp <= 2 + # server.cpp <= 3 + # other files <= 1 +--verbose=1 --verbose_module="src/brpc/channel=2,server=3" + # For files with same names, add paths +``` + +You can set `--verbose` and `--verbose_module` through `google::SetCommandLineOption` dynamically. + +VLOG has another form VLOG2, which allows user to specify virtual path: + +```c++ +// public/foo/bar.cpp +VLOG2("a/b/c", 2) << "being filtered by a/b/c rather than public/foo/bar"; +``` + +> VLOG and VLOG2 also have corresponding VLOG_IF and VLOG2_IF. + +## DLOG + +All log macros have debug versions, starting with D, such as DLOG, DVLOG. When NDEBUG is defined, these logs will not be printed. + +**Do not put important side effects inside the log streams beginning with D.** + +*No printing* means that even the parameters are not evaluated. If your parameters have side effects, they won't happend when NDEBUG is defined. For example, `DLOG(FATAL) << foo();` where foo is a function or it changes a dictionary, anyway, it's essential. However, it won't be evaluated when NDEBUG is defined. + +## CHECK + +Another import variation of logging is `CHECK(expression)`. When expression evaluates to false, it will print a fatal log. It's kind of like `ASSERT` in gtest, and has other form such as CHECK_EQ, CHECK_GT, and so on. When check fails, the message after will be printed. + +```c++ +CHECK_LT(1, 2) << "This is definitely true, this log will never be seen"; +CHECK_GT(1, 2) << "1 can't be greater than 2"; +``` + +Run the above code you should see a fatal log and the calling stack: + +``` +FATAL: ... Check failed: 1 > 2 (1 vs 2). 1 can't be greater than 2 +#0 0x000000afaa23 butil::debug::StackTrace::StackTrace() +#1 0x000000c29fec logging::LogStream::FlushWithoutReset() +#2 0x000000c2b8e6 logging::LogStream::Flush() +#3 0x000000c2bd63 logging::DestroyLogStream() +#4 0x000000c2a52d logging::LogMessage::~LogMessage() +#5 0x000000a716b2 (anonymous namespace)::StreamingLogTest_check_Test::TestBody() +#6 0x000000d16d04 testing::internal::HandleSehExceptionsInMethodIfSupported<>() +#7 0x000000d19e96 testing::internal::HandleExceptionsInMethodIfSupported<>() +#8 0x000000d08cd4 testing::Test::Run() +#9 0x000000d08dfe testing::TestInfo::Run() +#10 0x000000d08ec4 testing::TestCase::Run() +#11 0x000000d123c7 testing::internal::UnitTestImpl::RunAllTests() +#12 0x000000d16d94 testing::internal::HandleSehExceptionsInMethodIfSupported<>() +``` + +The second column of the callstack is the address of the code segment. You can use `addr2line` to check the corresponding file and line: + +``` +$ addr2line -e ./test_base 0x000000a716b2 +/home/gejun/latest_baidu_rpc/public/common/test/test_streaming_log.cpp:223 +``` + +You **should** use `CHECK_XX` for arithmetic condition so that you can see more detailed information when check failed. + +```c++ +int x = 1; +int y = 2; +CHECK_GT(x, y); // Check failed: x > y (1 vs 2). +CHECK(x > y); // Check failed: x > y. +``` + +Like DLOG, you should NOT include important side effects inside DCHECK. + +## LogSink + +The default destination of streaming log is the screen. You can change it through `logging::SetLogSink`. Users can inherit LogSink and implement their own output logic. We provide an internal LogSink as an example: + +### StringSink + +Inherit both LogSink and string. Store log content inside string and mainly aim for unit test. The following case shows a classic usage of StringSink: + +```c++ +TEST_F(StreamingLogTest, log_at) { + ::logging::StringSink log_str; + ::logging::LogSink* old_sink = ::logging::SetLogSink(&log_str); + LOG_AT(FATAL, "specified_file.cc", 12345) << "file/line is specified"; + // the file:line part should be using the argument given by us. + ASSERT_NE(std::string::npos, log_str.find("specified_file.cc:12345")); + // restore the old sink. + ::logging::SetLogSink(old_sink); +} +``` +