diff --git a/.gitignore b/.gitignore index e1fd4bc7492..b4a263156f0 100644 --- a/.gitignore +++ b/.gitignore @@ -183,7 +183,6 @@ ETAGS CTAGS tools/http_load/http_load -tools/jtest/jtest tools/trafficserver.pc tools/escape_mapper/escape_mapper diff --git a/src/iocore/cache/CacheVC.cc b/src/iocore/cache/CacheVC.cc index 12ef88ef327..12d3db4e039 100644 --- a/src/iocore/cache/CacheVC.cc +++ b/src/iocore/cache/CacheVC.cc @@ -87,7 +87,6 @@ DbgCtl dbg_ctl_cache_reenable{"cache_reenable"}; #define SCAN_WRITER_LOCK_MAX_RETRY 5 #define STORE_COLLISION 1 #define USELESS_REENABLES // allow them for now -// #define VERIFY_JTEST_DATA extern int64_t cache_config_ram_cache_cutoff; @@ -380,18 +379,6 @@ CacheVC::handleReadDone(int event, Event *e) goto Ldone; } -#ifdef VERIFY_JTEST_DATA - char xx[500]; - if (read_key && *read_key == doc->key && request.valid() && !dir_head(&dir) && !vio.ndone) { - int ib = 0, xd = 0; - request.url_get()->print(xx, 500, &ib, &xd); - char *x = xx; - for (int q = 0; q < 3; q++) - x = strchr(x + 1, '/'); - ink_assert(!memcmp(doc->data(), x, ib - (x - xx))); - } -#endif - if (dbg_ctl_cache_read.on()) { char xt[CRYPTO_HEX_SIZE]; Dbg(dbg_ctl_cache_read, diff --git a/src/iocore/cache/CacheWrite.cc b/src/iocore/cache/CacheWrite.cc index dec0da46872..087361b7c5d 100644 --- a/src/iocore/cache/CacheWrite.cc +++ b/src/iocore/cache/CacheWrite.cc @@ -745,17 +745,6 @@ Stripe::_agg_copy(CacheVC *vc) } else { iobufferblock_memcpy(doc->data(), vc->write_len, vc->blocks.get(), vc->offset); } -#ifdef VERIFY_JTEST_DATA - if (f.use_first_key && header_len) { - int ib = 0, xd = 0; - char xx[500]; - new_info.request_get().url_get().print(xx, 500, &ib, &xd); - char *x = xx; - for (int q = 0; q < 3; q++) - x = strchr(x + 1, '/'); - ink_assert(!memcmp(doc->hdr(), x, ib - (x - xx))); - } -#endif } if (cache_config_enable_checksum) { doc->checksum = 0; diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc index bbb682028b4..1c2f606a880 100644 --- a/src/proxy/http/HttpSM.cc +++ b/src/proxy/http/HttpSM.cc @@ -7555,14 +7555,6 @@ HttpSM::update_stats() } } - if (is_action_tag_set("assert_jtest_length")) { - if (t_state.hdr_info.client_response.valid() && t_state.hdr_info.client_response.status_get() == HTTP_STATUS_OK) { - int64_t p_resp_cl = t_state.hdr_info.client_response.get_content_length(); - int64_t resp_size = client_response_body_bytes; - ink_release_assert(p_resp_cl == -1 || p_resp_cl == resp_size || resp_size == 0); - } - } - ink_hrtime total_time = milestones.elapsed(TS_MILESTONE_SM_START, TS_MILESTONE_SM_FINISH); // ua_close will not be assigned properly in some exceptional situation. diff --git a/tools/check-unused-dependencies b/tools/check-unused-dependencies index a4376961d5d..a0f470bf635 100755 --- a/tools/check-unused-dependencies +++ b/tools/check-unused-dependencies @@ -57,7 +57,6 @@ def get_dependencies(program): # clang+asan pulls in dependencies for these specific tools: if any(map(progbase.__eq__, [ - 'jtest', 'http_load', 'escape_mapper', ])): diff --git a/tools/jtest/README b/tools/jtest/README deleted file mode 100644 index faa3b90e748..00000000000 --- a/tools/jtest/README +++ /dev/null @@ -1,78 +0,0 @@ -jtest is a performance stress tool for high performance proxy server -Apache Traffic Server. - -jtest works at both UA and OS side when benchmarking the proxy, sure -you can run client or server only. - - -Here is a step-by-step guide for benchmark Apache Traffic Server with jtest: -On localhost: -1, setup the remap rules in Apache Traffic Server: - add the following line into remap.config: - map http://localhost:9080/ http://127.0.0.1:9080/ -2, run jtest: - jtest - which is equal to "jtest -s 9080 -S localhost -p 8080 -P localhost", - will setup 100 connections (-c), push a hit ratio at 40% (-z), run as - fast as possible. - -The output of jtest: - con new ops 1byte lat bytes/per svrs new ops total time err - 100 468 2329.6 39 39 36323315/363233 617 617 617 46131904 136980.9 0 - 100 471 2361.5 39 40 35993941/359939 619 619 619 45466393 136981.9 0 - 100 465 2327.0 40 41 35385495/353854 607 607 607 45095273 136982.9 0 - con: number of the con-current connections - new: the number of created connections - ops: the request handled per second - lat: response time - -On separate hosts: -1, determine the roles: - we take 'ts.cn' as the domain we are benchmarking, and 192.168.0.1 as - the server which is running Apache Traffic Server, 192.168.0.2 as the - host we run jtest. -2, setup the remap rules in Apache Traffic Server: - in our case, we should add the following lines into remap.config: - map http://ts.cn:9080/ http://192.168.0.2:9080/ -3, we run jtest from 192.168.0.2: - jtest -S ts.cn -P 192.168.0.1 - -If you have many hosts running jtest: -1, setup the remap rules in Apache Traffic Server: - in our case, we should add the following lines into remap.config: - map http://ts.cn:9080/ http://192.168.0.2:9080/ - map http://ts.cn:9081/ http://192.168.0.2:9081/ - map http://ts.cn:9082/ http://192.168.0.3:9082/ - map http://ts.cn:9083/ http://192.168.0.3:9083/ - map http://ts.cn:9084/ http://192.168.0.4:9084/ -2, run test: - on 192.168.0.2: - jtest -S ts.cn -s 9080 -P 192.168.0.1 - jtest -S ts.cn -s 9081 -P 192.168.0.1 - on 192.168.0.3: - jtest -S ts.cn -s 9082 -P 192.168.0.1 - jtest -S ts.cn -s 9083 -P 192.168.0.1 - on 192.168.0.4: - jtest -S ts.cn -s 9084 -P 192.168.0.1 - - -Some common used options: --c, --clients int 100 Clients - compare to the -c in ab - by default, jtest will make 100 connections. - --k, --keepalive int 4 Keep-Alive Length --K, --keepalive_cons int 4 # Keep-Alive Connections (0:unlimit) - compare to -k in ab - how many Keep-Alive connections and how many requests for each Keep-Alive - connections - --z, --hitrate dbl 0.400 Hit Rate - by default, the hit ratio is 40% - --u, --urls str (null) URLs from File - you can provide your own URLs, if you do not use jtest on the OS side. - --y, --only_clients on false Only Clients --Y, --only_server on false Only Server - in-case of you do not use both the server and client diff --git a/tools/jtest/README.zh.md b/tools/jtest/README.zh.md deleted file mode 100644 index 241c0d5861f..00000000000 --- a/tools/jtest/README.zh.md +++ /dev/null @@ -1,450 +0,0 @@ -# jtest与ATS压力测试 # - -作为一个高性能的proxy代理服务器,Apache Traffic Server是很难用常用工具进行细致的性能压测的,本文尝试对性能压测进行定义并介绍如何在这种高性能、高并发、大规模的系统中,进行破坏级别的压力测试。 - -## 压力测试的定义 ## - -很多情况下,大家都希望在服务器上线前、业务上线前,对业务的支撑能力做一个测试,希望知道自己的改动是不是在进步,是不是能够比较平稳的抗住预期的流量压力,等等,总结下来压力测试的主要用途有: - -1. 确定新版本的改进不会引起性能问题 -2. 找出业务的单机qps数据,并定义好安全的水位线 -3. 使用性能数据作为硬件采购以及预算的参数 -4. 更好的理解业务的波动对线上系统的压力 - -日常开发中,最最有意义的是,找出新代码是否在性能上有回退;找出新的性能改进到底提高了多少。 - -根据压力的来源,压力测试又会被分为: - -1. 实验室仿真压力测试 - - 实验室仿真,在http proxy服务器测试场景下,客户端和服务器端的数据和请求,都是由工具生成的。 -2. 业务copy仿真压力测试 - - 显然,如果希望服务器程序真正的能够在线上跑,简单的实验室测试与业务需求肯定是差异非常大,因此就有了为什么不把业务流量复制到系统中来的想法。这也就是所谓的流量复制的压测方式。 - - -## jtest工具初步介绍 ## - -jtest是一个专门用于proxy/cache系统的实验室性能压力测试工具,具有极高的性能。能够同时担当后台服务器和客户端。ATS系统专用的性能压测工具。 - -商业的压力测试工具,一般也是一个所谓的盒子,自带客户端、服务器端,能够自己生成模拟流量、copy客户提交流量等,很像目前jtest的模式 - -最早的jtest被设计为可以分布式的集群上运行,用上层脚本系统起停,来压测一个ATS集群系统,早先的系统并没有考虑像现在这样的多核CPU普遍性,设计为单进程工具,我们只在后续高级用法中介绍单机如何跑多个进程等。 - -### jtest简单说明 ### - -### 基本测试 -jtest作为专门针对ATS的测试,已经就ATS的最简单配置下,做了很多简化的默认参数,以便于用户快速的上手,我们以最简单的本机jtest压测本机的默认配置ATS为例子,介绍最基本的jtest用法: - -1. 设置ATS的remap规则: - - 在默认的空remap.config中添加一条规则 - - > map http://localhost:9080/ http://127.0.0.1:9080/ - -2. 运行jtest: - > jtest - - 这个命令默认的参数,即相当于 "jtest -s 9080 -S localhost -p 8080 -P localhost -c 100 -z 0.4",将会起100个连接使用127.0.0.1的9080端口作为jtest源服务器(jtest监听),对本机(localhost)的8080端口上跑的ATS进行测试,并控制整体命中率在40%。 - -输出结果: - - con new ops 1byte lat bytes/per svrs new ops total time err - 100 468 2329.6 39 39 36323315/363233 617 617 617 46131904 136980.9 0 - 100 471 2361.5 39 40 35993941/359939 619 619 619 45466393 136981.9 0 - 100 465 2327.0 40 41 35385495/353854 607 607 607 45095273 136982.9 0 - -其中: - -* con: 并发连接数。并发连接数,单进程单cpu处理能力取决于CPU与测试场景,请酌情设置,推荐小于9999 -* new: 每秒新建连接数。这个参数取决于并发连接数量与长连接效率。 -* ops: 每秒请求数。也作qps,是比较体现服务器性能的关键指标。 -* 1byte:首字节平均响应时间。这个是体现整体转发效率的关键指标。 -* lat: 完成请求整体响应时间(收到最后一个字节)。cache系统性能关键指标。 -* bytes/per:每秒字节流量/每秒每连接流量 -* svrs:服务器端请求数 -* new:服务器端新建连接数 -* ops:服务器端每秒请求数 -* total:服务器端总请求的字节数 -* time:测试时间(秒) -* err:出错数量(连接数)。稳定性测试中,这个数据可以作为一个关键指标。 - -### jtest命令详解 -jtest有非常多的参数,这些参数的组合又会产生很多特殊的效果,我们将从完整的使用说明开始,详细说明jtest测试的命令参数: - - - localhost:tools zym$ ./jtest/jtest -h - JTest Version 1.94 - Dec 9 2013 17:11:24 (zym@zymMBPr.local) - Usage: /Users/zym/git/traffic.git/tools/jtest/.libs/jtest [--SWITCH [ARG]] - -#### 参数格式: -参数格式是 短参数_长参数_类型_默认值_参数说明,多数参数能做到自我说明并且比较详细,更多说明参见下面的说明以及后续高级用法示例。 - - switch__________________type__default___description - -#### 测试机器IP与端口设置: -这几个设置是最最常用的几个参数 - - -p, --proxy_port int 8080 Proxy Port - -P, --proxy_host str localhost Proxy Host - -s, --server_port int 0 Server Port (0:auto select) - -S, --server_host str (null) Server Host (null:localhost) - -* -p -P是用来指定要测试的ATS服务器地址、端口信息 -* -s -S是用来指定要测试的ATS服务器,用来作为源的jtest监听域名(IP)和端口信息。 - -#### 服务器压力控制: -服务器的压力仿真,主要是主动生成的随机流量控制 - - -r, --server_speed int 0 Server Bytes Per Second (0:unlimit) - -w, --server_delay int 0 Server Initial Delay (msec) - -c, --clients int 100 Clients - -R, --client_speed int 0 Client Bytes Per Second (0:unlimit) - -b, --sbuffersize int 4096 Server Buffer Size - -B, --cbuffersize int 2048 Client Buffer Size - -a, --average_over int 5 Seconds to Average Over - -z, --hitrate dbl 0.400 Hit Rate - -Z, --hotset int 1000 Hotset Size - -i, --interval int 1 Reporting Interval (seconds) - -k, --keepalive int 4 Keep-Alive Length - -K, --keepalive_cons int 4 # Keep-Alive Connections (0:unlimit) - -L, --docsize int -1 Document Size (-1:varied) - -j, --skeepalive int 4 Server Keep-Alive (0:unlimit) - -* -r -w -R,控制客户端、服务器端的速度,多数压测的情况下,不会做特殊限制,在需要仿真大并发、大延迟等情况下,可以做控制。 -* -b -B,模拟客户端和服务器端的buffer大小设置,buffer的大小可以极大的影响IO的能力,也会影响内存的占用。 -* -z -Z,这是用来控制命中率和热点数据。命中率是由热点数据的命中,加miss的请求。热点数据的多少,也会影响服务器的内存使用。 -* -k -K -j,控制客户端和服务器的长连接。 -* -i,用来控制jtest结果统计汇报间隔时间。 -* -L,用来控制jtest生成的随机url的返回body大小,默认-1表示完全随机,没有限制。 - -#### 控制输入输出的配置: - - -x, --show_urls on false Show URLs before they are accessed - -X, --show_headers on false Show Headers - -f, --ftp on false FTP Requests - - , --ftp_mdtm_err_rate dbl 0.000 FTP MDTM 550 Error Rate - - , --ftp_mdtm_rate int 0 FTP MDTM Update Rate (sec, 0:never) - -* -x -X,用来debug,显示url以及所有header头,是个排查利器。 -* ftp相关的是用来压测ftp的,不过ATS对ftp的支持已经删除啦。 - -#### 测试的流程处理: -jtest测试,是可以进行复杂的处理,比如对一个网站进行深度抓取测试,对反向、正向、透明模式测试 - - -l, --fullpage on false Full Page (Images) - -F, --follow on false Follow Links - -J, --same_host on false Only follow URLs on same host - -t, --test_time int 0 run for N seconds (0:unlimited) - -u, --urls str (null) URLs from File - -U, --urlsdump str (null) URLs to File - -H, --hostrequest int 0 Host Request(1=yes,2=transparent) - -C, --check_content on false Check returned content - - , --nocheck_length on false Don't check returned length - -m, --obey_redirects off true Obey Redirects - -M, --embed URL off true Embed URL in synth docs - -* -l -F -J,用来对html文件进行解析,并提前其中的所有图片元素等进行深度抓取的控制。 -* -t,控制测试运行时间,默认一直跑 -* -u -U,给jtest指定url,纪录jtest跑的url(如过存在解析html的方式,则纪录的可能会多于指定的) -* -H,控制服务器测试模式,是否带host头,决定了服务器是跑在反向代理、正向代理、透明代理模式 -* -C --nocheck_length,是否检查返回的内容、长度 -* -m,是否跳转 -* -M,控制是否把uri放到返回结果的Body开头,这个一般用来做数据校验使用 - - -#### 请求的分散度与热点: -hash是jtest、ats里无处不在的,如何让hash互相影响,甚至测试hash碰撞等情况?? - - -q, --url_hash_entries int 1000000 URL Hash Table Size (-1:use file size) - -Q, --url_hash_filename str (null) URL Hash Table Filename - --q -Q,hash控制 -#### 服务器的控制: -服务器的使用类型控制,可以让jtest跑在不同的模式下 - - -y, --only_clients on false Only Clients - -Y, --only_server on false Only Server - -A, --bandwidth_test int 0 Bandwidth Test - -T, --drop_after_CL on false Drop after Content-Length - -* -y -Y,可以将jtest单独跑为服务器和客户端分离的服务。 -* -A,-T,可以做更快的流量压测工具。 - -#### 其他控制信息机制: - - -V, --version on false Version - -v, --verbose on false Verbose Flag - -E, --verbose_errors off true Verbose Errors Flag - -* -v -E,可以用于debug错误等 - -#### 请求的随机机制: -本类参数,主要控制服务器和请求的随机程度、复杂度,构建一个复杂的测试用例,将会对服务器的稳定性测试起到很好的效果。 - - -D, --drand int 0 Random Number Seed - -I, --ims_rate dbl 0.500 IMS Not-Changed Rate - -g, --client_abort_rate dbl 0.000 Client Abort Rate - -G, --server_abort_rate dbl 0.000 Server Abort Rate - -n, --extra_headers int 0 Number of Extra Headers - -N, --alternates int 0 Number of Alternates - -e, --client_rate int 0 Clients Per Sec - -o, --abort_retry_speed int 0 Abort/Retry Speed - - , --abort_retry_bytes int 0 Abort/Retry Threshold (bytes) - - , --abort_retry_secs int 5 Abort/Retry Threshold (secs) - -W, --reload_rate dbl 0.000 Reload Rate - -* -D,用于生成url的随机数,如果有多个jtest并发运行,可以对这个随机的seed进行区分以控制cache的多小等 -* -I,请求的内容中,带的IMS比例 -* -g -G,客户端和服务器的Abort比例 -* -n -N,控制客户端发送的header数量,服务器的内容的副本数量 -* -e,每秒的客户端数量 -* -o --abort_retry_bytes --abort_retry_secs,控制重试的速度 -* -W 控制内容的重复度?? - -#### 服务的仿真程度: - - -O, --compd_port int 0 Compd port - -1, --compd_suite on false Compd Suite - -2, --vary_user_agent int 0 Vary on User-Agent (use w/ alternates) - -3, --content_type int 0 Server Content-Type (1 html, 2 jpeg) - -4, --request_extension int 0 Request Extn (1".html" 2".jpeg" 3"/") - -5, --no_cache int 0 Send Server no-cache - -7, --zipf_bucket int 1 Bucket size (of 1M buckets) for Zipf - -8, --zipf dbl 0.000 Use a Zipf distribution with this alpha (say 1.2) - -9, --evo_rate dbl 0.000 Evolving Hotset Rate (evolutions/hour) - -* -0 -1,compress服务 -* -2,控制是否对不同的UA启用多副本 -* -3,服务器返回的内容的类型 -* -4,请求的内容类型 -* -5,是否发送给服务器no-cache控制 -* -7 -8,zipf服务 -* -9,热点的偏移调整 - -#### 其他信息: - - -d, --debug on false Debug Flag - -h, --help Help - - -============================================================================= -#### 上面参数中一些经常用的参数 - -* -c, --clients int 100 Clients - - 跟ab的-c参数类似,默认jtest会启用100并发 - -* -k, --keepalive int 4 Keep-Alive Length -* -K, --keepalive_cons int 4 # Keep-Alive Connections (0:unlimit) - - 跟ab的-k参数功能类似,控制长连接的数量以及长连接的效率 - -* -z, --hitrate dbl 0.400 Hit Rate - - 命中率40%,在反向代理里太低了 - -* -u, --urls str (null) URLs from File - - 提供自己的urls,可以像http_load一样使用jtest - -* -y, --only_clients on false Only Clients -* -Y, --only_server on false Only Server - - 如果你需要独立运行jtest客户端和服务器端,以提高性能和吞吐量等 - -## jtest进阶用法 -本段将会介绍如何更好的使用jtest压榨ATS性能,避免瓶颈问题 -### 独立测试机器 -进阶用法里,我们先尝试用用2个机器,一个跑jtest,一个跑ATS,做压力测试: - -1. 定义机器角色: - - 我们使用 'ts.cn' 作为压测的URL的域名,192.168.0.1作为我们的ATS服务器,192.168.0.2作为我们的测试端,跑jtest。 - -2. 设置ATS的map规则: - - 在我们的例子里,我们应该设置如下规则: - - `map http://ts.cn:9080/ http://192.168.0.2:9080/` -3. 在192.168.0.2运行jtest命令: - - `jtest -S ts.cn -P 192.168.0.1` - -在这个例子里,我们设置了服务器端的域名为`ts.cn`,同时给了`-P`参数指定了要测试的服务器是192.168.0.1,这样我们的jtest将会使用192.168.0.1:8080作为代理服务器,使用ts.cn作为要压测的域名来进行压力测试。 - -### 一个机器多个jtest -由于jtest是单进程模式,进程压测一个当前多大16个core以上的ATS系统,肯定是压不动的,如何才能更好的进行压测呢?接上面的例子,我们可以在测试端多跑几个jtest进程,我们暂跑6个: - -1. 定义机器角色: - - 我们使用 'ts.cn' 作为压测的URL的域名,192.168.0.1作为我们的ATS服务器,192.168.0.2作为我们的测试端,跑jtest。 - -2. 设置ATS的map规则: - - 在我们的例子里,我们应该设置如下规则: - - map http://ts.cn:9080/ http://192.168.0.2:9080/ - map http://ts.cn:9081/ http://192.168.0.2:9081/ - map http://ts.cn:9082/ http://192.168.0.2:9082/ - map http://ts.cn:9083/ http://192.168.0.2:9083/ - map http://ts.cn:9084/ http://192.168.0.2:9084/ - map http://ts.cn:9085/ http://192.168.0.2:9085/ - - -3. 在192.168.0.2运行对应的6个jtest命令: - - jtest -S ts.cn -s 9080 -P 192.168.0.1 & - jtest -S ts.cn -s 9081 -P 192.168.0.1 & - jtest -S ts.cn -s 9082 -P 192.168.0.1 & - jtest -S ts.cn -s 9083 -P 192.168.0.1 & - jtest -S ts.cn -s 9084 -P 192.168.0.1 & - jtest -S ts.cn -s 9085 -P 192.168.0.1 & - -这样就可以啦,问题是一个窗口里会持续打印很多很多结果信息,不太容易分辨问题。如何更优雅的跑多个机器多个jtest能?我们下面进行详细介绍 - -## jtest集群用法 -在服务器集群中,一对一的压测是很见的,通常我们为减少jtest性能瓶颈,会采用多个机器,多个jtest一起跑的情况,我们将需要引入其他控制机制、数据统计机制才好。 - -### screen的并行jtest管理 -首先,我们必须有一个并发测试的机制,单机多进程、多机并行,并且可以统计各个进程返回的结果。当初ATS有一个测试框架,可以执行多个机器的并行测试,并能够汇总多个jtest的返回结果,现今我们虽然没有这个工具,但是我们在服务器端的统计工具tsar能够帮我们补足类似的统计数据,测试客户端方面我们这里采用简单的模式,一个screen脚本: - -screen的`-c`参数,可以很方便的启动一个screen脚本,在这个脚本里,可以启动多个screen窗口,我们采用这个机制来并发的启动多个jtest命令,由于screen同时有后台驻留的功能,可以确保我们在需要的时候回来看看各个jtest的测试结果,下面是一个screen的脚本: - - screen jtest -P 192.168.0.1 -S ts.cn -s 192.168.0.2 -z 1.0 -D 9080 -k 2 -c 30 -Z 1000 -q 10000 -L 50000 - screen jtest -P 192.168.0.1 -S ts.cn -s 192.168.0.2 -z 1.0 -D 9081 -k 2 -c 30 -Z 1000 -q 10000 - screen top - detach - -我们采用screen的`-X quit`命令,来停止jtest压测。同时,为了表示各个目标的机器,我们可以给各个screen打上标识(-R参数),以方便区分一个机器上的多个screen。 - -### N:N的测试机器 -为了测试,我们假定有10台测试客户端机器192.168.0.{10..19},10台测试目标服务器192.168.0.{20..29}(组成cluster或孤立服务器),我们可以在所有的测试客户端机器上对任何目标服务器进行压测,下面是我用来生成测试的一些小脚本: - -修改测试方法jtest参数 - - # c为服务器ip 10..19 - # s为客户端ip 20..29 - # i为每个机器最多对一个服务器起10个进程压测 0..9 - # 我们的jtest监听端口为 $c$s$i,如10200这样的形式 - - for c in {10..19} - do - for s in {20..29} - do - for i in {0..9} - do echo map http://ts.cn:$c$s$i/ http://192.168.0.$c:$c$s$i/ - done - done - done - -这会生成一个很长的map规则,你需要把这个map规则添加到所有的ATS服务器的remap.config配置里: - - map http://ts.cn:10200/ http://192.168.0.10:10200/ - map http://ts.cn:10201/ http://192.168.0.10:10201/ - map http://ts.cn:10202/ http://192.168.0.10:10202/ - . - . - . - map http://ts.cn:19297/ http://192.168.0.19:19297/ - map http://ts.cn:19298/ http://192.168.0.19:19298/ - map http://ts.cn:19299/ http://192.168.0.19:19299/ - -然后我们就可以用来做一些测试的screen脚本,下面的脚本会让我们在每个测试客户端机器上,生成一些针对每个测试目标服务器启动jtest的screen命令脚本: - - for c in {10..19};do for s in {20..29};do echo " - screen jtest -P 192.168.0.${s} -S ts.cn -s ${c}${s}0 -z 1.0 -D ${c}${s} -k 2 -c 30 -Z 1000 -q 10000 -L 5000 - screen jtest -P 192.168.0.${s} -S ts.cn -s ${c}${s}1 -z 1.0 -D ${c}${s} -k 2 -c 30 -Z 1000 -q 10000 -L 10000 - screen jtest -P 192.168.0.${s} -S ts.cn -s ${c}${s}2 -z 1.0 -D ${c}${s} -k 2 -c 30 -Z 1000 -q 10000 -L 10000 - screen jtest -P 192.168.0.${s} -S ts.cn -s ${c}${s}3 -z 1.0 -D ${c}${s} -k 2 -c 30 -Z 1000 -q 10000 -L 20000 - screen jtest -P 192.168.0.${s} -S ts.cn -s ${c}${s}4 -z 1.0 -D ${c}${s} -k 2 -c 30 -Z 1000 -q 10000 -L 50000 - screen jtest -P 192.168.0.${s} -S ts.cn -s ${c}${s}5 -z 1.0 -D ${c}${s} -k 2 -c 30 -Z 1000 -q 10000 - detach - " | ssh root@192.168.0.$c tee jtest.screen.$s;done;done -这里生产的每个脚本会对每个测试服务器,启动多个jtest测试,我们这里定义了文件大小(-L)分别为5KB、10KB、10KB、20KB、50KB、随机大小的6个jtest进程,每个进程并发(-c)30,命中率(-z)100%,并通过随机参数(-D)把hash散开。 - -然后,测试脚本的启动将会非常简单: - - for c in {10..19} - do - for s in {20..29} - do - ssh -t 192.168.0.$c screen -R jtest$s -c jtest.screen.$s - done - done -这些screen进程被命名为jtest{20..29},你可以用screen -R jtest20来进入后台运行的screen,并用标准的CTRL+a n键盘指令在各个jtest脚本切换以查看测试情况。 - -停止测试的命令也很简单: - - for c in {10..19} - do - for s in {20..29} - do - ssh -t 192.168.0.$c screen -Rx jtest$s -X quit - done - done -以上命令都可以根据需要调整c和s的数量,例如可以实现多个客户端机器压测一个ATS服务器的效果等。 - -## 如何更好的仿真出线上的复杂情况 -通过上面的screen脚本的定义,我们可以随时修改其中的重要设置,如-c -z等等,甚至增加一些客户端仿真的随机因素等等。 - -#### 如何指定内存占用 -ATS的内存是一个历来敏感的话题,内存泄漏也是最最难排查的问题之一,如何在jtest测试中针对性的对内存缓存进行控制就是一个比较切实的需求。 - -默认jtest的-L参数是-1,大小全随机,不限制返回body大小的情况下,1000个hot object仅占用24MB左右内存缓存,平均文件大小差不多24KB,从这里我们就明白了,如何调整参数以让ATS占用更多内存: - -1. 用-Z调整hot objects数量,这是热点数据,是可缓存数据,是有机会进内存的数据; -2. 用-L调整返回的文档的Body大小,hotset * docsize 就是我们需要的内存缓存大小; - -这样我们就能够构建一个良好的内存占用模型,通过ATS的dump mem参数 *proxy.config.dump_mem_info_frequency* 可以让内存缓存占用也dump出来如: - - ----------------------------------------------------------------------------------------- - Allocated | In-Use | Type Size | Free List Name - --------------------|--------------------|------------|---------------------------------- - 0 | 0 | 2097152 | memory/ioBufAllocator[14] - 3355443200 | 3215982592 | 1048576 | memory/ioBufAllocator[13] - 67108864 | 55574528 | 524288 | memory/ioBufAllocator[12] - 0 | 0 | 262144 | memory/ioBufAllocator[11] - 0 | 0 | 131072 | memory/ioBufAllocator[10] - 0 | 0 | 65536 | memory/ioBufAllocator[9] - -如上所示中结果中,ram在records.yaml中的相关配置为: - -```yaml - ts: - cache: - ram_cache: - size: 3221225470 - ram_cache_cutoff: 41943040 -``` - - -remap.config配置为: - - map http://tsdirect.cn:9080/ http://127.0.0.1:9080/ - -开启mem dump,我仅使用两个命令先后测试: - - jtest -P localhost -S tsdirect.cn -L 950000 -z 1 -Z 3000 - jtest -P localhost -S tsdirect.cn -L 480000 -z 1 -Z 6000 -经过第一个jtest命令执行后,内存读入接近3G数据,使用的是1048576(1M)大小的ioBufAllocator分配的,而执行第二个jtest,除了填满后续空闲的600M内存,其他近3G数据再也没有机会进入内存了,无论第二个jtest跑多久都是这样。 - -这个结果显示我们当前master版本在内存缓存管理方面的热点替换方面的问题仍然没有改进。 - -## 如何使用jtest来跑stress测试 -TBD - -## 如何分析jtest爆出的一些ATS问题 -TBD - -## jtest的待改进问题以及后续计划 -* 支持https?? -* 支持spdy?? -* 单进程太弱?? -* 集群压测上层调度工具??? -* 循环(随机)压测urls提供的列表,类似http_load一样 - - - - - diff --git a/tools/jtest/jtest.cc b/tools/jtest/jtest.cc deleted file mode 100644 index b2943115cf7..00000000000 --- a/tools/jtest/jtest.cc +++ /dev/null @@ -1,4818 +0,0 @@ -/** @file - - A brief file description - - @section license License - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include "tscore/ink_defs.h" -#include "tscore/ink_error.h" -#include "tscore/ink_memory.h" -#include "tscore/ink_assert.h" -#include "tscore/MD5.h" -#include "tscore/ParseRules.h" -#include "tscore/ink_time.h" -#include "tscore/ink_args.h" -#include "tscore/Version.h" -#include "swoc/TextView.h" -#include "tscore/Random.h" - -/* - FTP - Traffic Server Template - 220 i5 FTP server (Version wu-2.4(3) Mon Jul 8 14:39:48 PDT 1996) ready. - USER anonymous - 331 Guest login ok, send your complete e-mail address as password. - PASS traffic_server@inktomi.com - 230 Guest login ok, access restrictions apply. - CWD . - 250 CWD command successful. - TYPE I - 200 Type set to I. - PASV - 227 Entering Passive Mode (128,174,5,14,16,238) - RETR foo - LIST - 150 Opening ASCII mode data connection for /bin/ls. -*/ - -#define MAX_URL_LEN 1024 - -// -// Compilation Options -// - -#define SERVER_BUFSIZE 4096 -#define CLIENT_BUFSIZE 2048 -#define MAX_BUFSIZE (65536 + 4096) - -// -// Constants -// -#define MAXFDS 65536 -#define HEADER_DONE -1 -#define POLL_GROUP_SIZE 800 -#define MAX_RESPONSE_LENGTH 1000000 -#define HEADER_SIZE 10000 -#define POLL_TIMEOUT 10 -#define STATE_FTP_DATA_READY 0xFAD -#define MAX_DEFERED_URLS 10000 -#define DEFERED_URLS_BLOCK 2000 - -#define MAX_REQUEST_BODY_LENGTH MAX_RESPONSE_LENGTH - -#define JTEST_DONE 0 -#define JTEST_CONT 1 - -static const char *hexdigits = "0123456789ABCDEFabcdef"; -static const char *dontunescapify = "#;/?+=&:@%"; -static const char *dontescapify = "#;/?+=&:@~.-_%"; - -enum FTP_MODE { - FTP_NULL, - FTP_PORT, - FTP_PASV, -}; - -using accept_fn_t = int (*)(int); -using poll_cb = int (*)(int); - -static int read_request(int sock); -static int write_request(int sock); -static int make_client(unsigned int addr, int port); -static void make_bfc_client(unsigned int addr, int port); -static int make_url_client(const char *url, const char *base_url = 0, bool seen = false, bool unthrottled = false); -static int write_ftp_response(int sock); -static void interval_report(); -static void undefer_url(bool unthrottled = false); -static void done(); -static int is_done(); -static int open_server(unsigned short int port, accept_fn_t accept_fn); -static int accept_ftp_data(int sock); - -static char **defered_urls = nullptr; -static int n_defered_urls = 0; -static int server_fd = 0; -static int server_port = 0; -static int proxy_port = 8080; -static unsigned int proxy_addr = 0; -static unsigned int local_addr = 0; -static char proxy_host[81] = "localhost"; -static char local_host[255 + 1]; -static int verbose = 0; -static int verbose_errors = 1; -static int debug = 0; -static int nclients = 100; -static int current_clients = 0; -static int client_speed = 0; -static int check_content = 0; -static int nocheck_length = 0; -static int obey_redirects = 1; -static int only_clients = 0; -static int only_server = 0; -static int drop_after_CL = 0; -static int server_speed = 0; -static int server_delay = 0; -static int interval = 1; -static int sbuffersize = SERVER_BUFSIZE; -static int cbuffersize = CLIENT_BUFSIZE; -static int test_time = 0; -static int last_fd = -1; -static char *response_buffer = nullptr; -static int errors = 0; -static int clients = 0, running_clients = 0, new_clients = 0, total_clients = 0; -static int servers = 0, running_servers = 0, new_servers = 0, total_servers = 0; -static float running_ops = 0; -static int new_ops = 0; -static float total_ops = 0; -static int running_sops = 0, new_sops = 0, total_sops = 0; -static int running_latency = 0, latency = 0; -static int lat_ops = 0, b1_ops = 0, running_b1latency = 0, b1latency = 0; -static uint64_t running_cbytes = 0, new_cbytes = 0, total_cbytes = 0; -static uint64_t running_tbytes = 0, new_tbytes = 0, total_tbytes = 0; -static int average_over = 5; -static double hitrate = 0.4; -static int hotset = 1000; -static int keepalive = 4; -static int keepalive_cons = 4; -static int follow_arg = 0; -static int follow = 0; -static int follow_same_arg = 0; -static int follow_same = 0; -static char current_host[512]; -static int fullpage = 0; -static int show_before = 0; -static int show_headers = 0; -static int server_keepalive = 4; -static int urls_mode = 0; -static int pipeline = 1; -static int hostrequest = 0; -static int ftp = 0; -static double ftp_mdtm_err_rate = 0.0; -static int ftp_mdtm_rate = 0; -static time_t ftp_mdtm_last_update = 0; -static char ftp_mdtm_str[64]; -static int embed_url = 1; -static double ims_rate = 0.5; -static double client_abort_rate = 0.0; -static double server_abort_rate = 0.0; -static int compd_port = 0; -static int compd_suite = 0; -static int ka_cache_head[500]; -static int ka_cache_tail[500]; -static int n_ka_cache = 0; -static char urls_file[256] = ""; -static FILE *urls_fp = nullptr; -static char urlsdump_file[256] = ""; -static FILE *urlsdump_fp = nullptr; -static int drand_seed = 0; -static int docsize = -1; -static int url_hash_entries = 1000000; -static char url_hash_filename[256] = ""; -static int bandwidth_test = 0; -static int bandwidth_test_to_go = 0; -static uint64_t total_client_request_bytes = 0; -static uint64_t total_proxy_request_bytes = 0; -static uint64_t total_server_response_body_bytes = 0; -static uint64_t total_server_response_header_bytes = 0; -static uint64_t total_proxy_response_body_bytes = 0; -static uint64_t total_proxy_response_header_bytes = 0; -static ink_hrtime now = 0, start_time = 0; -static int extra_headers = 0; -static int alternates = 0; -static int abort_retry_speed = 0; -static int abort_retry_bytes = 0; -static int abort_retry_secs = 5; -static int client_rate = 0; -static double reload_rate = 0; -static int vary_user_agent = 0; -static int server_content_type = 0; -static int request_extension = 0; -static int no_cache = 0; -static double evo_rate = 0.0; -static double zipf = 0.0; -static int zipf_bucket_size = 1; -static int range_mode = 0; -static int post_support = 0; -static int post_size = 0; - -static const ArgumentDescription argument_descriptions[] = { - {"proxy_port", 'p', "Proxy Port", "I", &proxy_port, "JTEST_PROXY_PORT", nullptr}, - {"proxy_host", 'P', "Proxy Host", "S80", &proxy_host, "JTEST_PROXY_HOST", nullptr}, - {"server_port", 's', "Server Port (0:auto select)", "I", &server_port, "JTEST_SERVER_PORT", nullptr}, - {"server_host", 'S', "Server Host (null:localhost)", "S80", &local_host, "JTEST_SERVER_HOST", nullptr}, - {"server_speed", 'r', "Server Bytes Per Second (0:unlimit)", "I", &server_speed, "JTEST_SERVER_SPEED", nullptr}, - {"server_delay", 'w', "Server Initial Delay (msec)", "I", &server_delay, "JTEST_SERVER_INITIAL_DELAY", nullptr}, - {"clients", 'c', "Clients", "I", &nclients, "JTEST_CLIENTS", nullptr}, - {"client_speed", 'R', "Client Bytes Per Second (0:unlimit)", "I", &client_speed, "JTEST_CLIENT_SPEED", nullptr}, - {"sbuffersize", 'b', "Server Buffer Size", "I", &sbuffersize, "JTEST_SERVER_BUFSIZE", nullptr}, - {"cbuffersize", 'B', "Client Buffer Size", "I", &cbuffersize, "JTEST_CLIENT_BUFSIZE", nullptr}, - {"average_over", 'a', "Seconds to Average Over", "I", &average_over, "JTEST_AVERAGE_OVER", nullptr}, - {"hitrate", 'z', "Hit Rate", "D", &hitrate, "JTEST_HITRATE", nullptr}, - {"hotset", 'Z', "Hotset Size", "I", &hotset, "JTEST_HOTSET", nullptr}, - {"interval", 'i', "Reporting Interval (seconds)", "I", &interval, "JTEST_INTERVAL", nullptr}, - {"keepalive", 'k', "Keep-Alive Length", "I", &keepalive, "JTEST_KEEPALIVE", nullptr}, - {"keepalive_cons", 'K', "# Keep-Alive Connections (0:unlimit)", "I", &keepalive_cons, "JTEST_KEEPALIVE_CONNECTIONS", nullptr}, - {"docsize", 'L', "Document Size (-1:varied)", "I", &docsize, "JTEST_DOCSIZE", nullptr}, - {"skeepalive", 'j', "Server Keep-Alive (0:unlimit)", "I", &server_keepalive, "JTEST_SERVER_KEEPALIVE", nullptr}, - {"show_urls", 'x', "Show URLs before they are accessed", "F", &show_before, "JTEST_SHOW_URLS", nullptr}, - {"show_headers", 'X', "Show Headers", "F", &show_headers, "JTEST_SHOW_HEADERS", nullptr}, - {"ftp", 'f', "FTP Requests", "F", &ftp, "JTEST_FTP", nullptr}, - {"ftp_mdtm_err_rate", ' ', "FTP MDTM 550 Error Rate", "D", &ftp_mdtm_err_rate, "JTEST_FTP_MDTM_ERR_RATE", nullptr}, - {"ftp_mdtm_rate", ' ', "FTP MDTM Update Rate (sec, 0:never)", "I", &ftp_mdtm_rate, "JTEST_FTP_MDTM_RATE", nullptr}, - {"fullpage", 'l', "Full Page (Images)", "F", &fullpage, "JTEST_FULLPAGE", nullptr}, - {"follow", 'F', "Follow Links", "F", &follow_arg, "JTEST_FOLLOW", nullptr}, - {"same_host", 'J', "Only follow URLs on same host", "F", &follow_same_arg, "JTEST_FOLLOW_SAME", nullptr}, - {"test_time", 't', "run for N seconds (0:unlimited)", "I", &test_time, "TEST_TIME", nullptr}, - {"urls", 'u', "URLs from File", "S256", urls_file, "JTEST_URLS", nullptr}, - {"urlsdump", 'U', "URLs to File", "S256", urlsdump_file, "JTEST_URLS_DUMP", nullptr}, - {"hostrequest", 'H', "Host Request(1=yes,2=transparent)", "I", &hostrequest, "JTEST_HOST_REQUEST", nullptr}, - {"check_content", 'C', "Check returned content", "F", &check_content, "JTEST_CHECK_CONTENT", nullptr}, - {"nocheck_length", ' ', "Don't check returned length", "F", &nocheck_length, "JTEST_NOCHECK_LENGTH", nullptr}, - {"obey_redirects", 'm', "Obey Redirects", "f", &obey_redirects, "JTEST_OBEY_REDIRECTS", nullptr}, - {"embed URL", 'M', "Embed URL in synth docs", "f", &embed_url, "JTEST_EMBED_URL", nullptr}, - {"url_hash_entries", 'q', "URL Hash Table Size (-1:use file size)", "I", &url_hash_entries, "JTEST_URL_HASH_ENTRIES", nullptr}, - {"url_hash_filename", 'Q', "URL Hash Table Filename", "S256", url_hash_filename, "JTEST_URL_HASH_FILENAME", nullptr}, - {"only_clients", 'y', "Only Clients", "F", &only_clients, "JTEST_ONLY_CLIENTS", nullptr}, - {"only_server", 'Y', "Only Server", "F", &only_server, "JTEST_ONLY_SERVER", nullptr}, - {"bandwidth_test", 'A', "Bandwidth Test", "I", &bandwidth_test, "JTEST_BANDWIDTH_TEST", nullptr}, - {"drop_after_CL", 'T', "Drop after Content-Length", "F", &drop_after_CL, "JTEST_DROP", nullptr}, - {"verbose", 'v', "Verbose Flag", "F", &verbose, "JTEST_VERBOSE", nullptr}, - {"verbose_errors", 'E', "Verbose Errors Flag", "f", &verbose_errors, "JTEST_VERBOSE_ERRORS", nullptr}, - {"drand", 'D', "Random Number Seed", "I", &drand_seed, "JTEST_DRAND", nullptr}, - {"ims_rate", 'I', "IMS Not-Changed Rate", "D", &ims_rate, "JTEST_IMS_RATE", nullptr}, - {"client_abort_rate", 'g', "Client Abort Rate", "D", &client_abort_rate, "JTEST_CLIENT_ABORT_RATE", nullptr}, - {"server_abort_rate", 'G', "Server Abort Rate", "D", &server_abort_rate, "JTEST_SERVER_ABORT_RATE", nullptr}, - {"extra_headers", 'n', "Number of Extra Headers", "I", &extra_headers, "JTEST_EXTRA_HEADERS", nullptr}, - {"alternates", 'N', "Number of Alternates", "I", &alternates, "JTEST_ALTERNATES", nullptr}, - {"client_rate", 'e', "Clients Per Sec", "I", &client_rate, "JTEST_CLIENT_RATE", nullptr}, - {"abort_retry_speed", 'o', "Abort/Retry Speed", "I", &abort_retry_speed, "JTEST_ABORT_RETRY_SPEED", nullptr}, - {"abort_retry_bytes", ' ', "Abort/Retry Threshold (bytes)", "I", &abort_retry_bytes, "JTEST_ABORT_RETRY_THRESHHOLD_BYTES", - nullptr }, - {"abort_retry_secs", ' ', "Abort/Retry Threshold (secs)", "I", &abort_retry_secs, "JTEST_ABORT_RETRY_THRESHHOLD_SECS", nullptr}, - {"reload_rate", 'W', "Reload Rate", "D", &reload_rate, "JTEST_RELOAD_RATE", nullptr}, - {"compd_port", 'O', "Compd port", "I", &compd_port, "JTEST_COMPD_PORT", nullptr}, - {"compd_suite", '1', "Compd Suite", "F", &compd_suite, "JTEST_COMPD_SUITE", nullptr}, - {"vary_user_agent", '2', "Vary on User-Agent (use w/ alternates)", "I", &vary_user_agent, "JTEST_VARY_ON_USER_AGENT", nullptr}, - {"content_type", '3', "Server Content-Type (1 html, 2 jpeg)", "I", &server_content_type, "JTEST_CONTENT_TYPE", nullptr}, - {"request_extension", '4', "Request Extn (1\".html\" 2\".jpeg\" 3\"/\")", "I", &request_extension, "JTEST_REQUEST_EXTENSION", - nullptr }, - {"no_cache", '5', "Send Server no-cache", "I", &no_cache, "JTEST_NO_CACHE", nullptr}, - {"zipf_bucket", '7', "Bucket size (of 1M buckets) for Zipf", "I", &zipf_bucket_size, "JTEST_ZIPF_BUCKET_SIZE", nullptr}, - {"zipf", '8', "Use a Zipf distribution with this alpha (say 1.2)", "D", &zipf, "JTEST_ZIPF", nullptr}, - {"evo_rate", '9', "Evolving Hotset Rate (evolutions/hour)", "D", &evo_rate, "JTEST_EVOLVING_HOTSET_RATE", nullptr}, - {"debug", 'd', "Debug Flag", "F", &debug, "JTEST_DEBUG", nullptr}, - {"range_mode", ' ', "Range Mode", "I", &range_mode, "JTEST_RANGE_MODE", nullptr}, - {"post_support", ' ', "POST Mode (0 disable(default), 1 random, 2 specified size by post_size)", "I", &post_support, - "JTEST_POST_MODE", nullptr}, - {"post_size", ' ', "POST SIZE", "I", &post_size, "JTEST_POST_SIZE", nullptr}, - HELP_ARGUMENT_DESCRIPTION(), - VERSION_ARGUMENT_DESCRIPTION() -}; -int n_argument_descriptions = countof(argument_descriptions); - -struct FD { - int fd; - poll_cb read_cb; - poll_cb write_cb; - ink_hrtime start; - ink_hrtime active; - ink_hrtime ready; - - double doc; - int doc_length; - struct sockaddr_in name; - - int state; // request parsing state - int req_pos; // request read position - char *base_url = nullptr; - char *req_header = nullptr; - char *response = nullptr; - char *response_header = nullptr; - int max_req_header_size = 0; - int length; - int response_length; - int response_remaining; - int keepalive = 0; - int next; - int nalternate = 0; - unsigned int ip = 0; - unsigned int binary : 1; - unsigned int ims : 1; - unsigned int range : 1; - unsigned int drop_after_CL : 1; - unsigned int client_abort : 1; - unsigned int jg_compressed : 1; - int *count; - int bytes; - int ftp_data_fd = 0; - FTP_MODE ftp_mode; - unsigned int ftp_peer_addr; - unsigned short ftp_peer_port; - unsigned long range_bytes; - unsigned long range_end; - unsigned long range_start; - int post_size; - int total_length; - int post_cl; - int send_header; - int header_size; - - void - reset() - { - next = 0; - fd = -1; - read_cb = nullptr; - write_cb = nullptr; - state = 0; - start = 0; - active = 0; - ready = 0; - req_pos = 0; - length = 0; - range = 0; - range_bytes = 0; - range_start = 0; - range_end = 0; - post_size = 0; - send_header = 0; - - if (!urls_mode) { - response = nullptr; - } - - if (response_header) { - response_header[0] = 0; - } - - response_length = 0; - response_remaining = 0; - count = nullptr; - bytes = 0; - doc = 0.0; - doc_length = 0; - ims = 0; - drop_after_CL = ::drop_after_CL; - client_abort = 0; - jg_compressed = 0; - ftp_mode = FTP_NULL; - ftp_peer_addr = 0; - ftp_peer_port = 0; - total_length = 0; - post_cl = 0; - header_size = 0; - } - - void close(); - FD() : binary(0) - { - ink_zero(name); - reset(); - } -}; - -FD *fd = nullptr; - -void -FD::close() -{ - if (verbose) { - printf("close: %d\n", fd); - } - ::close(fd); - if (is_done()) { - done(); - } - keepalive = 0; - ip = 0; - if (count) { - (*count)--; - } - if (count == &clients) { - current_clients--; - } - reset(); - if (urls_mode) { - undefer_url(); - } - ftp_data_fd = 0; -} - -#define MAX_FILE_ARGUMENTS 100 - -struct InkWebURLComponents { - char sche[MAX_URL_LEN + 1]; - char host[MAX_URL_LEN + 1]; - char port[MAX_URL_LEN + 1]; - char path[MAX_URL_LEN + 1]; - char frag[MAX_URL_LEN + 1]; - char quer[MAX_URL_LEN + 1]; - char para[MAX_URL_LEN + 1]; - - int sche_exists; - int host_exists; - int port_exists; - int path_exists; - int frag_exists; - int quer_exists; - int para_exists; - - int rel_url; - int leading_slash; - int is_path_name; -}; - -static int ink_web_remove_dots(char *src, char *dest, int *leadingslash, int max_dest_len); - -static int ink_web_unescapify_string(char *dest_in, char *src_in, int max_dest_len); - -static int ink_web_escapify_string(char *dest_in, char *src_in, int max_dest_len); - -static void ink_web_decompose_url(const char *src_url, char *sche, char *host, char *port, char *path, char *frag, char *quer, - char *para, int *real_sche_exists, int *real_host_exists, int *real_port_exists, - int *real_path_exists, int *real_frag_exists, int *real_quer_exists, int *real_para_exists, - int *real_relative_url, int *real_leading_slash); - -static void ink_web_canonicalize_url(const char *base_url, const char *emb_url, char *dest_url, int max_dest_url_len); - -static void ink_web_decompose_url_into_structure(const char *url, InkWebURLComponents *c); - -static void -remove_last_seg(char *src, char *dest) -{ - char *ptr; - for (ptr = src + strlen(src) - 1; ptr >= src; ptr--) { - if (*ptr == '/') { - break; - } - } - while (src <= ptr) { - *dest++ = *src++; - } - *dest = '\0'; -} - -static inline void -remove_multiple_slash(char *src, char *dest) -{ - char *ptr = nullptr; - - for (ptr = src; *ptr;) { - *(dest++) = *ptr; - if (*ptr == '/') { - while ((*ptr == '/') && *ptr) { - ptr++; - } - } else { - ptr++; - } - } - *dest = '\0'; -} - -static inline void -append_string(char *dest, const char *src, int *offset_ptr, int max_len) -{ - int num = strlen(src); - if (*offset_ptr + num >= max_len) { - num = max_len - (*offset_ptr + 1); - if (num <= 1) { - return; - } - } - memcpy(dest + *offset_ptr, src, num); - dest[*offset_ptr + num] = '\0'; - (*offset_ptr) += num; -} - -// End Library functions - -static void -panic(const char *s) -{ - fputs(s, stderr); - exit(1); -} - -static void -panic_perror(const char *s) -{ - perror(s); - exit(1); -} - -static int -max_limit_fd() -{ - struct rlimit rl; - if (getrlimit(RLIMIT_NOFILE, &rl) >= 0) { -#ifdef OPEN_MAX - // Darwin - rl.rlim_cur = std::min(static_cast(OPEN_MAX), rl.rlim_max); -#else - rl.rlim_cur = rl.rlim_max; -#endif - if (setrlimit(RLIMIT_NOFILE, &rl) >= 0) { - if (getrlimit(RLIMIT_NOFILE, &rl) >= 0) { - return rl.rlim_cur; - } - } - } - panic_perror("couldn't set RLIMIT_NOFILE\n"); - return -1; -} - -static int -read_ready(int fd_in) -{ - struct pollfd p; - p.events = POLLIN; - p.fd = fd_in; - int r = poll(&p, 1, 0); - if (r <= 0) { - return r; - } - if (p.revents & (POLLERR | POLLNVAL)) { - return -1; - } - if (p.revents & (POLLIN | POLLHUP)) { - return 1; - } - return 0; -} - -static void -poll_init(int sock) -{ - if (!fd[sock].req_header) { - fd[sock].max_req_header_size = HEADER_SIZE * pipeline + MAX_REQUEST_BODY_LENGTH; - fd[sock].req_header = (char *)malloc(fd[sock].max_req_header_size); - } - if (!fd[sock].response_header) { - fd[sock].response_header = (char *)malloc(HEADER_SIZE); - } - if (!fd[sock].base_url) { - fd[sock].base_url = (char *)malloc(HEADER_SIZE); - } - fd[sock].reset(); -} - -static void -poll_set(int sock, poll_cb read_cb, poll_cb write_cb = nullptr) -{ - if (verbose) { - printf("adding poll %d %s %s\n", sock, read_cb ? "READ" : "-", write_cb ? "WRITE" : "-"); - } - fd[sock].fd = sock; - fd[sock].read_cb = read_cb; - fd[sock].write_cb = write_cb; - if (last_fd < sock) { - last_fd = sock; - } -} - -static void -poll_init_set(int sock, poll_cb read_cb, poll_cb write_cb = nullptr) -{ - poll_init(sock); - poll_set(sock, read_cb, write_cb); -} - -static int -fast(int sock, int speed, int d) -{ - if (!speed) { - return 0; - } - int64_t t = now - fd[sock].start + 1; - int target = (int)(((t / HRTIME_MSECOND) * speed) / 1000); - int delta = d - target; - if (delta > 0) { - int mwait = (delta * 1000) / speed; - fd[sock].ready = now + (mwait * HRTIME_MSECOND); - return 1; - } else { - fd[sock].ready = now; - } - return 0; -} - -// Return the number of milliseconds elapsed since the start of the request. -static ink_hrtime -elapsed_from_start(int sock) -{ - ink_hrtime timenow = ink_get_hrtime(); - return ink_hrtime_diff_msec(timenow, fd[sock].start); -} - -static int -faster_than(int sock, int speed, int d) -{ - if (!speed) { - return 1; - } - int64_t t = now - fd[sock].start + 1; - int target = (int)(((t / HRTIME_MSECOND) * speed) / 1000); - int delta = d - target; - if (delta > 0) { - return 1; - } - return 0; -} - -static void -get_path_from_req(char *buf, char **purl_start, char **purl_end) -{ - char *url_start = buf; - char *url_end = nullptr; - if (!strncasecmp(url_start, "GET ", sizeof("GET ") - 1)) { - url_start += sizeof("GET ") - 1; - url_end = (char *)memchr(url_start, ' ', 70); - } else if (!strncasecmp(url_start, "POST ", sizeof("POST ") - 1)) { - url_start += sizeof("POST ") - 1; - url_end = (char *)memchr(url_start, ' ', 70); - } else { - url_end = (char *)memchr(url_start, 0, 70); - } - if (!url_end) { - panic("malformed request\n"); - } - if (url_end - url_start > 10) { - if (!strncasecmp(url_start, "http://", sizeof("http://") - 1)) { - url_start += sizeof("http://") - 1; - url_start = (char *)memchr(url_start, '/', 70); - } - } - *purl_start = url_start; - *purl_end = url_end; -} - -static int -make_response_header(int sock, char *url_start, char *url_end, int *url_len, char *header, int header_limit) -{ - const char *content_type; - switch (server_content_type) { - case 1: - content_type = "text/html"; - break; - case 2: - content_type = "image/jpeg"; - break; - default: - content_type = ((compd_suite || alternates) ? "image/jpeg" : "text/html"); - if (only_server && strstr(fd[sock].req_header, "Cookie:")) { - content_type = "image/jpeg"; - } - } - if (!ftp && embed_url && fd[sock].response_length > 16) { - get_path_from_req(fd[sock].req_header, &url_start, &url_end); - *url_end = 0; - *url_len = url_end - url_start; - } - int print_len = 0; - if (!ftp) { - if (fd[sock].range) { - char buff[1024]; - memset(buff, 0, 1024); - if (fd[sock].range_end > fd[sock].range_start) { - snprintf(buff, 1024, "Content-Range: bytes %lu-%lu/%d", fd[sock].range_start, fd[sock].range_end, fd[sock].total_length); - } else { - snprintf(buff, 1024, "Content-Range: bytes %lu-%d/%d", fd[sock].range_start, fd[sock].total_length, fd[sock].total_length); - } - print_len = snprintf(header, header_limit, - "HTTP/1.1 206 Partial-Content\r\n" - "Content-Type: %s\r\n" - "Cache-Control: max-age=630720000\r\n" - "Last-Modified: Mon, 05 Oct 2010 01:00:00 GMT\r\n" - "%s" - "Content-Length: %d\r\n" - "%s\r\n" - "%s" - "\r\n%s", - content_type, fd[sock].keepalive > 0 ? "Connection: Keep-Alive\r\n" : "Connection: close\r\n", - fd[sock].response_length, buff, no_cache ? "Pragma: no-cache\r\nCache-Control: no-cache\r\n" : "", - url_start ? url_start : ""); - } else if (fd[sock].ims) { - print_len = snprintf(header, header_limit, - "HTTP/1.0 304 Not-Modified\r\n" - "Content-Type: %s\r\n" - "Last-Modified: Mon, 05 Oct 2010 01:00:00 GMT\r\n" - "%s" - "\r\n", - content_type, fd[sock].keepalive > 0 ? "Connection: Keep-Alive\r\n" : ""); - *url_len = 0; - } else { - print_len = snprintf(header, header_limit, - "HTTP/1.0 200 OK\r\n" - "Content-Type: %s\r\n" - "Cache-Control: max-age=630720000\r\n" - "Last-Modified: Mon, 05 Oct 2010 01:00:00 GMT\r\n" - "%s" - "Content-Length: %d\r\n" - "%s" - "\r\n%s", - content_type, fd[sock].keepalive > 0 ? "Connection: Keep-Alive\r\n" : "", fd[sock].response_length, - no_cache ? "Pragma: no-cache\r\nCache-Control: no-cache\r\n" : "", url_start ? url_start : ""); - } - } else { - *url_len = print_len = - snprintf(header, header_limit, "ftp://%s:%d/%12.10f/%d", local_host, server_port, fd[sock].doc, fd[sock].length); - } - - if (show_headers) { - printf("Response to Proxy: {\n%s}\n", header); - } - - return print_len; -} - -static int -send_response(int sock) -{ - char *url_start = nullptr; - char *url_end = nullptr; - int err = 0, towrite; - int url_len = 0; - - if (fd[sock].req_pos >= 0) { - char header[1024]; - - int print_len = make_response_header(sock, url_start, url_end, &url_len, header, 1024); - - int len = print_len - fd[sock].req_pos; - ink_assert(len > 0); - do { - err = write(sock, header + fd[sock].req_pos, len); - } while ((err == -1) && (errno == EINTR)); - if (err <= 0) { - if (!err) { - return -1; - } - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - return -1; - } - if (verbose) { - printf("wrote %d %d\n", sock, err); - } - new_tbytes += err; - fd[sock].req_pos += err; - fd[sock].bytes += err; - if (fd[sock].req_pos >= len) { - fd[sock].req_pos = -1; - } else { - return 0; - } - fd[sock].response += url_len; - fd[sock].length -= url_len; - if (fd[sock].range) { - fd[sock].range_bytes -= url_len; - } - total_server_response_header_bytes += print_len - url_len; - total_server_response_body_bytes += url_len; - } - - /* then the response */ - towrite = server_speed ? server_speed : MAX_RESPONSE_LENGTH; - if (!fd[sock].range) { - if (fd[sock].length < towrite) { - towrite = fd[sock].length; - } - } else { - if (fd[sock].range_bytes < (unsigned long)towrite) { - towrite = fd[sock].range_bytes; - } - } - - if (towrite > 0) { - if (fast(sock, server_speed, fd[sock].bytes)) { - return 0; - } - do { - err = write(sock, fd[sock].response, towrite); - } while ((err == -1) && (errno == EINTR)); - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - fprintf(stderr, "write errno %d length %d sock %d\n", errno, towrite, sock); - errors++; - return -1; - } - if (verbose) { - printf("wrote %d %d\n", sock, err); - } - - if (fd[sock].range) { - ink_assert(err <= (int)(fd[sock].range_end - fd[sock].range_start + 1)); - } - - new_tbytes += err; - total_server_response_body_bytes += err; - fd[sock].response += err; - fd[sock].length -= err; - fd[sock].bytes += err; - } - - if (fast(sock, server_speed, fd[sock].bytes)) { - return 0; - } - if (fd[sock].length <= 0 || !err) { - if (fd[sock].response) { - new_sops++; - } - if (verbose) { - printf("write %d done\n", sock); - } - if (fd[sock].keepalive > 0 && !ftp) { - poll_init_set(sock, read_request); - fd[sock].start = now; - fd[sock].ready = now + server_delay * HRTIME_MSECOND; - return 0; - } - return 1; - } - - return 0; -} - -static char * -strncasestr(char *s, const char *find, int len) -{ - int findlen = strlen(find); - char *e = s + len; - while (1) { - char *x = (char *)memchr(s, *find, e - s); - if (!x) { - if (ParseRules::is_upalpha(*find)) { - x = (char *)memchr(s, ParseRules::ink_tolower(*find), e - s); - } else { - x = (char *)memchr(s, ParseRules::ink_toupper(*find), e - s); - } - if (!x) { - break; - } - } - if (!strncasecmp(find, x, findlen)) { - return x; - } - s = x + 1; - } - return nullptr; -} - -static char * -check_keepalive(char *r, int length) -{ - char *ka = strncasestr(r, "Connection:", length); - char *http_1_1 = strncasestr(r, "HTTP/1.1", length); - if (http_1_1 && !ka) { - return http_1_1; - } - if (ka) { - int l = length - (ka - r); - char *e = (char *)memchr(ka, '\n', l); - if (!e) { - e = (char *)memchr(ka, '\r', l); - } - if (strncasestr(ka, "close", e - ka)) { - return nullptr; - } - } - return ka; -} - -static int -check_alt(char *r, int length) -{ - char *s = strncasestr(r, "Cookie:", length); - if (!s) { - s = strncasestr(r, "User-Agent:", length); - if (s) { - s += sizeof("User-Agent:"); - } - } else { - s += sizeof("Cookie:"); - } - if (s) { - int l = length - (s - r); - char *e = (char *)memchr(s, '\n', l); - if (!e) { - e = (char *)memchr(s, '\r', l); - } - if (!(s = strncasestr(s, "jtest", e - s))) { - return 0; - } - s = (char *)memchr(s, '-', l); - if (!s) { - return 0; - } - s = (char *)memchr(s + 1, '-', l); - if (!s) { - return 0; - } - return ink_atoi(s + 1); - } - return 0; -} - -static void -make_response(int sock, int code) -{ - fd[sock].response = fd[sock].req_header; - fd[sock].length = snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "%d\r\n", code); - fd[sock].req_pos = 0; - fd[sock].response_length = strlen(fd[sock].req_header); - poll_set(sock, nullptr, write_ftp_response); -} - -static void -make_long_response(int sock) -{ - fd[sock].response = fd[sock].req_header; - fd[sock].req_pos = 0; - fd[sock].response_length = strlen(fd[sock].req_header); - poll_set(sock, nullptr, write_ftp_response); -} - -static int -send_ftp_data_when_ready(int sock) -{ - if (fd[sock].state == STATE_FTP_DATA_READY && fd[sock].doc_length) { - fd[sock].response = fd[sock].req_header; - fd[sock].response_length = fd[sock].length = fd[sock].doc_length; - if (verbose) { - printf("ftp data %d >-< %d\n", sock, fd[sock].ftp_data_fd); - } - fd[sock].response = response_buffer + fd[sock].doc_length % 256; - fd[sock].req_pos = 0; - poll_set(sock, nullptr, send_response); - } - return 0; -} - -static int -send_ftp_data(int sock, char *start /*, char * end */) -{ - int data_fd = fd[sock].ftp_data_fd; - if (sscanf(start, "%d", &fd[data_fd].doc_length) != 1) { - return -1; - } - fd[data_fd].doc = fd[sock].doc; - send_ftp_data_when_ready(data_fd); - return 0; -} - -static int -process_header(int sock, char *buffer, int offset) -{ - char host[80]; - int port, length; - float r; - int post_request = 0; - if (sscanf(buffer, "GET http://%[^:]:%d/%f/%d", host, &port, &r, &length) == 4) { - } else if (sscanf(buffer, "GET /%f/%d", &r, &length) == 2) { - } else if (sscanf(buffer, "POST http://%[^:]:%d/%f/%d", host, &port, &r, &length) == 4) { - post_request = 1; - } else if (sscanf(buffer, "POST /%f/%d", &r, &length) == 2) { - post_request = 1; - } else { - if (verbose) { - printf("misscan: %s\n", buffer); - } - fd[sock].close(); - return -1; - } - - if (verbose) { - printf("read_request %d got request %d\n", sock, length); - } - char *ims = strncasestr(buffer, "If-Modified-Since:", offset); - char *range = strncasestr(buffer, "Range:", offset); - char *post_cl = nullptr; - if (post_support) { - post_cl = strncasestr(buffer, "Content-Length:", offset); - fd[sock].post_cl = atoi(post_cl + strlen("Content-Length: ")); - ink_assert(post_cl && post_request && fd[sock].post_cl); - } - if (ts::Random::drandom() > ims_rate) { - ims = nullptr; - } - if (range) { - fd[sock].range = 1; - if (sscanf(range, "Range: bytes=%lu-%lu", &fd[sock].range_start, &fd[sock].range_end) == 2) { - fd[sock].range_bytes = fd[sock].range_end - fd[sock].range_start + 1; - } else if (sscanf(range, "Range: bytes=%lu-", &fd[sock].range_start) == 1) { - fd[sock].range_bytes = length - fd[sock].range_start + 1; - } else { - if (verbose) - printf("invalid 206"); - } - ims = nullptr; - if (verbose) { - printf("sending Range: 206 Partial %lu-%lu\n", fd[sock].range_start, fd[sock].range_end); - } - } - - fd[sock].ims = ims ? 1 : 0; - if (!ims) { - if (range) { - fd[sock].total_length = length; - fd[sock].response_length = fd[sock].length = fd[sock].range_bytes; - } else { - fd[sock].response_length = fd[sock].length = length; - } - fd[sock].nalternate = check_alt(fd[sock].req_header, strlen(fd[sock].req_header)); - fd[sock].response = response_buffer + length % 256 + fd[sock].nalternate; - } else { - fd[sock].nalternate = 0; - if (verbose) { - printf("sending IMS 304: Not-Modified\n"); - } - fd[sock].response = nullptr; - fd[sock].response_length = fd[sock].length = 0; - } - fd[sock].header_size = offset; - - return post_request; -} - -static int -parse_header(int sock, int err) -{ - int i; - int post_request = 0; - - if (verbose) { - printf("read %d got %d\n", sock, err); - } - total_proxy_request_bytes += err; - new_tbytes += err; - fd[sock].req_pos += err; - fd[sock].req_header[fd[sock].req_pos] = 0; - char *buffer = fd[sock].req_header; - for (i = fd[sock].req_pos - err; i < fd[sock].req_pos; i++) { - switch (fd[sock].state) { - case 0: - if (buffer[i] == '\r') { - fd[sock].state = 1; - } else if (buffer[i] == '\n') { - fd[sock].state = 2; - } - break; - case 1: - if (buffer[i] == '\n') { - fd[sock].state = 2; - } else { - fd[sock].state = 0; - } - break; - case 2: - if (buffer[i] == '\r') { - fd[sock].state = 3; - } else if (buffer[i] == '\n') { - fd[sock].state = 3; - goto L3; - } else { - fd[sock].state = 0; - } - break; - L3: - case 3: - if (buffer[i] == '\n') { - if (show_headers) { - printf("Request from Proxy: {\n%s}\n", buffer); - } - - post_request = process_header(sock, buffer, i); - if (post_request < 0) { - return JTEST_DONE; - } - - if (post_request) { - fd[sock].state = 4; - break; - } - - fd[sock].req_pos = 0; - if (!check_keepalive(fd[sock].req_header, strlen(fd[sock].req_header))) { - fd[sock].keepalive = 0; - } else { - fd[sock].keepalive--; - } - if (fd[sock].length && ts::Random::drandom() < server_abort_rate) { - fd[sock].length = (int)(ts::Random::drandom() * (fd[sock].length - 1)); - fd[sock].keepalive = 0; - } - poll_set(sock, nullptr, send_response); - return JTEST_DONE; - } else { - fd[sock].state = 0; - } - break; - case 4: - if (fd[sock].req_pos - fd[sock].header_size - 1 >= fd[sock].post_cl) { - fd[sock].req_pos = 0; - if (!check_keepalive(fd[sock].req_header, strlen(fd[sock].req_header))) { - fd[sock].keepalive = 0; - } else { - fd[sock].keepalive--; - } - if (fd[sock].length && ts::Random::drandom() < server_abort_rate) { - fd[sock].length = (int)(ts::Random::drandom() * (fd[sock].length - 1)); - fd[sock].keepalive = 0; - } - poll_set(sock, nullptr, send_response); - fd[sock].state = 0; - return JTEST_DONE; - } - return JTEST_CONT; - } - } - return JTEST_CONT; -} - -static int -read_request(int sock) -{ - if (verbose) { - printf("read_request %d\n", sock); - } - int err = 0; - int maxleft = 0; - - if (!post_support) - maxleft = HEADER_SIZE - fd[sock].req_pos - 1; - else - maxleft = HEADER_SIZE + MAX_REQUEST_BODY_LENGTH - fd[sock].req_pos - 1; - - while (true) { - do { - err = read(sock, &fd[sock].req_header[fd[sock].req_pos], maxleft); - } while ((err < 0) && (errno == EINTR)); - - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - if (fd[sock].req_pos || errno != ECONNRESET) { - perror("read"); - } - return -1; - } else if (err == 0) { - if (verbose) { - printf("eof\n"); - } - return -1; - } else { - if (verbose) { - printf("read %d got %d\n", sock, err); - } - - if (parse_header(sock, err) == JTEST_DONE) - return 0; - } - } - return 0; -} - -static int -send_compd_response(int sock) -{ - int err = 0; - - struct { - unsigned int code; - unsigned int len; - } compd_header; - if (fd[sock].req_pos < (int)sizeof(compd_header)) { - compd_header.code = 0; - compd_header.len = htonl((fd[sock].length * 2) / 3); - do { - err = write(sock, (char *)&compd_header + fd[sock].req_pos, sizeof(compd_header) - fd[sock].req_pos); - } while ((err == -1) && (errno == EINTR)); - if (err <= 0) { - if (!err) { - if (verbose_errors) { - printf("write %d closed early\n", sock); - } - goto Lerror; - } - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - perror("write"); - goto Lerror; - } - if (verbose) { - printf("write %d %d\n", sock, err); - } - - new_tbytes += err; - fd[sock].req_pos += err; - fd[sock].bytes += err; - fd[sock].response = response_buffer + (((fd[sock].length * 2) / 3) % 256); - } - - if (fd[sock].req_pos < ((fd[sock].length * 2) / 3) + (int)sizeof(compd_header)) { - int towrite = cbuffersize; - int desired = ((fd[sock].length * 2) / 3) + sizeof(compd_header) - fd[sock].req_pos; - if (towrite > desired) { - towrite = desired; - } - if (fast(sock, client_speed, fd[sock].bytes)) { - return 0; - } - do { - err = write(sock, fd[sock].response + fd[sock].req_pos - sizeof(compd_header), towrite); - } while ((err == -1) && (errno == EINTR)); - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - fprintf(stderr, "write errno %d length %d sock %d\n", errno, towrite, sock); - errors++; - return -1; - } - if (verbose) { - printf("wrote %d %d\n", sock, err); - } - - new_tbytes += err; - total_server_response_body_bytes += err; - fd[sock].req_pos += err; - fd[sock].bytes += err; - } - - if (fd[sock].req_pos >= ((fd[sock].length * 2) / 3) + 4) { - return -1; - } - - return 0; -Lerror: - errors++; - return 1; -} - -static int -read_compd_request(int sock) -{ - if (verbose) { - printf("read_compd_request %d\n", sock); - } - int err = 0; - - if (fd[sock].req_pos < 4) { - int maxleft = HEADER_SIZE - fd[sock].req_pos - 1; - do { - err = read(sock, &fd[sock].req_header[fd[sock].req_pos], maxleft); - } while ((err < 0) && (errno == EINTR)); - - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - perror("read"); - return -1; - } else if (err == 0) { - if (verbose) { - printf("eof\n"); - } - return -1; - } else { - if (verbose) { - printf("read %d got %d\n", sock, err); - } - total_proxy_request_bytes += err; - new_tbytes += err; - fd[sock].req_pos += err; - if (fd[sock].req_pos < 4) { - return 0; - } - fd[sock].length = ntohl(*(unsigned int *)fd[sock].req_header); - } - } - - if (fd[sock].req_pos >= fd[sock].length + 4) { - goto Lcont; - } - - { - char buf[MAX_BUFSIZE]; - int toread = cbuffersize; - if (fast(sock, client_speed, fd[sock].bytes)) { - return 0; - } - do { - err = read(sock, buf, toread); - } while ((err == -1) && (errno == EINTR)); - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - if (errno == ECONNRESET) { - if (verbose || verbose_errors) { - perror("read"); - } - errors++; - return -1; - } - panic_perror("read"); - } - if (!err) { - if (verbose || verbose_errors) { - perror("read"); - } - errors++; - return -1; - } - total_proxy_request_bytes += err; - new_tbytes += err; - fd[sock].req_pos += err; - } - - if (fd[sock].req_pos >= fd[sock].length + 4) { - goto Lcont; - } - - return 0; - -Lcont: - fd[sock].req_pos = 0; - fd[sock].keepalive = 0; - poll_set(sock, nullptr, send_compd_response); - return 0; -} - -static int -read_ftp_request(int sock) -{ - if (verbose) { - printf("read_ftp_request %d\n", sock); - } - int err = 0; - int i; - - int maxleft = HEADER_SIZE - fd[sock].req_pos - 1; - - do { - err = read(sock, &fd[sock].req_header[fd[sock].req_pos], maxleft); - } while ((err < 0) && (errno == EINTR)); - - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - perror("read"); - return -1; - } else if (err == 0) { - if (verbose) { - printf("eof\n"); - } - return -1; - } else { - if (verbose) { - printf("read %d got %d\n", sock, err); - } - new_tbytes += err; - fd[sock].req_pos += err; - fd[sock].req_header[fd[sock].req_pos] = 0; - char *buffer = fd[sock].req_header, *n; - int res = 0; - buffer[fd[sock].req_pos] = 0; - if (verbose) { - printf("buffer [%s]\n", buffer); - } -#define STREQ(_x, _s) (!strncasecmp(_x, _s, sizeof(_s) - 1)) - if (STREQ(buffer, "USER")) { - res = 331; - goto Lhere; - } else if (STREQ(buffer, "PASS")) { - res = 230; - goto Lhere; - } else if (STREQ(buffer, "CWD")) { - // TS used to send "CWD 1.2110000000..." - // TS now sends "CWD /1.2110000000^M\n", so skip 5 instead of 4 - fd[sock].doc = (buffer[4] == '/') ? atof(buffer + 5) : atof(buffer + 4); - res = 250; - goto Lhere; - } else if (STREQ(buffer, "TYPE")) { - res = 200; - Lhere: - n = (char *)memchr(buffer, '\n', fd[sock].req_pos); - if (!n) { - return 0; - } - make_response(sock, res); - return 0; - } else if (STREQ(buffer, "SIZE")) { - fd[sock].length = snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "213 %d\r\n", atoi(buffer + 5)); - make_long_response(sock); - return 0; - } else if (STREQ(buffer, "MDTM")) { - double err_rand = 1.0; - if (ftp_mdtm_err_rate != 0.0) { - err_rand = ts::Random::drandom(); - } - if (err_rand < ftp_mdtm_err_rate) { - fd[sock].length = snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "550 mdtm file not found\r\n"); - } else { - if (ftp_mdtm_rate == 0) { - fd[sock].length = snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "213 19900615100045\r\n"); - } else { - time_t mdtm_now; - time(&mdtm_now); - if (mdtm_now - ftp_mdtm_last_update > ftp_mdtm_rate) { - struct tm *mdtm_tm; - ftp_mdtm_last_update = mdtm_now; - mdtm_tm = localtime(&ftp_mdtm_last_update); - snprintf(ftp_mdtm_str, sizeof(ftp_mdtm_str), "213 %.4d%.2d%.2d%.2d%.2d%.2d", mdtm_tm->tm_year + 1900, - mdtm_tm->tm_mon + 1, mdtm_tm->tm_mday, mdtm_tm->tm_hour, mdtm_tm->tm_min, mdtm_tm->tm_sec); - } - fd[sock].length = snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "%s\r\n", ftp_mdtm_str); - } - } - make_long_response(sock); - return 0; - } else if (STREQ(buffer, "PASV")) { - n = (char *)memchr(buffer, '\n', fd[sock].req_pos); - if (!n) { - return 0; - } - if ((fd[sock].ftp_data_fd = open_server(0, accept_ftp_data)) < 0) { - panic("could not open ftp data PASV accept port\n"); - } - fd[fd[sock].ftp_data_fd].ftp_data_fd = sock; - if (verbose) { - printf("ftp PASV %d <-> %d\n", sock, fd[sock].ftp_data_fd); - } - unsigned short p = fd[fd[sock].ftp_data_fd].name.sin_port; - fd[sock].length = - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "227 (%u,%u,%u,%u,%u,%u)\r\n", - ((unsigned char *)&local_addr)[0], ((unsigned char *)&local_addr)[1], ((unsigned char *)&local_addr)[2], - ((unsigned char *)&local_addr)[3], ((unsigned char *)&p)[0], ((unsigned char *)&p)[1]); - if (verbose) { - puts(fd[sock].req_header); - } - make_long_response(sock); - fd[sock].ftp_mode = FTP_PASV; - return 0; - } else if (STREQ(buffer, "PORT")) { - // watch out for an endian problems !!! - char *start, *stop; - for (start = buffer; !ParseRules::is_digit(*start); start++) { - ; - } - for (stop = start; *stop != ','; stop++) { - ; - } - for (i = 0; i < 4; i++) { - ((unsigned char *)&(fd[sock].ftp_peer_addr))[i] = strtol(start, &stop, 10); - for (start = ++stop; *stop != ','; stop++) { - ; - } - } - ((unsigned char *)&(fd[sock].ftp_peer_port))[0] = strtol(start, &stop, 10); - start = ++stop; - ((unsigned char *)&(fd[sock].ftp_peer_port))[1] = strtol(start, nullptr, 10); - fd[sock].length = snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "200 Okay\r\n"); - if (verbose) { - puts(fd[sock].req_header); - } - make_long_response(sock); - fd[sock].ftp_mode = FTP_PORT; - return 0; - } else if (STREQ(buffer, "RETR")) { - if (fd[sock].ftp_mode == FTP_NULL) { - // default to PORT ftp - struct sockaddr_in ftp_peer; - int ftp_peer_addr_len = sizeof(ftp_peer); - if (getpeername(sock, (struct sockaddr *)&ftp_peer, -#if 0 - &ftp_peer_addr_len -#else - (socklen_t *)&ftp_peer_addr_len -#endif - ) < 0) { - perror("getsockname"); - exit(EXIT_FAILURE); - } - fd[sock].ftp_peer_addr = ftp_peer.sin_addr.s_addr; - fd[sock].ftp_peer_port = ftp_peer.sin_port; - fd[sock].ftp_mode = FTP_PORT; - } - if (fd[sock].ftp_mode == FTP_PORT) { - if ((fd[sock].ftp_data_fd = make_client(fd[sock].ftp_peer_addr, fd[sock].ftp_peer_port)) < 0) { - panic("could not open ftp PORT data connection to client\n"); - } - fd[fd[sock].ftp_data_fd].ftp_data_fd = sock; - fd[fd[sock].ftp_data_fd].state = STATE_FTP_DATA_READY; - if (verbose) { - printf("ftp PORT %d <-> %d\n", sock, fd[sock].ftp_data_fd); - } - } - n = (char *)memchr(buffer, '\n', fd[sock].req_pos); - if (!n) { - return 0; - } - if (send_ftp_data(sock, buffer + 5 /*, n */) < 0) { - errors++; - *n = 0; - if (verbose) { - printf("badly formed ftp request: %s\n", buffer); - } - return 1; - } - fd[sock].response = fd[sock].req_header; - fd[sock].length = - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, "150 %d bytes\r\n", fd[fd[sock].ftp_data_fd].length); - fd[sock].req_pos = 0; - fd[sock].response_length = strlen(fd[sock].req_header); - poll_set(sock, nullptr, write_ftp_response); - return 0; - } else { - if (verbose || verbose_errors) { - printf("ftp junk : %s\n", buffer); - } - fd[sock].req_pos = 0; - return 0; - } - } -} - -static int -accept_sock(int sock) -{ - struct sockaddr_in clientname; - int size = sizeof(clientname); - int new_fd = 0; - do { - new_fd = accept(sock, (struct sockaddr *)&clientname, -#if 0 - &size -#else - (socklen_t *)&size -#endif - ); - if (new_fd < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - if (errno == EINTR || errno == ECONNABORTED) { - continue; - } - printf("accept socket was %d\n", sock); - panic_perror("accept"); - } - } while (new_fd < 0); - - if (fcntl(new_fd, F_SETFL, O_NONBLOCK) < 0) { - panic_perror("fcntl"); - } - -#if 0 -#ifdef BUFSIZE // make default - int bufsize = BUFSIZE; - if (setsockopt(new_fd,SOL_SOCKET,SO_SNDBUF, - (const char *)&bufsize,sizeof(bufsize)) < 0) { - perror("setsockopt"); - } - if (setsockopt(new_fd,SOL_SOCKET,SO_SNDBUF, - (const char *)&bufsize,sizeof(bufsize)) < 0) { - perror("setsockopt"); - } -#endif -#endif - int enable = 1; - if (setsockopt(new_fd, IPPROTO_TCP, TCP_NODELAY, (const char *)&enable, sizeof(enable)) < 0) { - perror("setsockopt"); - } -#ifdef PRINT_LOCAL_PORT - struct sockaddr_in local_sa; - size = sizeof(local_sa); - getsockname(new_fd, (struct sockaddr *)&local_sa, &size); - printf("local_sa.sin_port = %d\n", local_sa.sin_port); -#endif - return new_fd; -} - -static int -accept_compd(int sock) -{ - int new_fd = accept_sock(sock); - servers++; - new_servers++; - poll_init_set(new_fd, nullptr, read_compd_request); - fd[new_fd].count = &servers; - fd[new_fd].start = now; - fd[new_fd].ready = now + server_delay * HRTIME_MSECOND; - fd[new_fd].keepalive = server_keepalive ? server_keepalive : INT_MAX; - - return 0; -} - -static int -accept_read(int sock) -{ - int new_fd = accept_sock(sock); - servers++; - new_servers++; - if (ftp) { - poll_init_set(new_fd, nullptr, write_ftp_response); - make_response(new_fd, 220); - } else { - poll_init_set(new_fd, read_request); - } - fd[new_fd].count = &servers; - fd[new_fd].start = now; - fd[new_fd].ready = now + server_delay * HRTIME_MSECOND; - fd[new_fd].keepalive = server_keepalive ? server_keepalive : INT_MAX; - - return 0; -} - -static int -accept_ftp_data(int sock) -{ - int new_fd = accept_sock(sock); - servers++; - new_servers++; - poll_init(new_fd); - fd[new_fd].ftp_data_fd = fd[sock].ftp_data_fd; - fd[fd[sock].ftp_data_fd].ftp_data_fd = new_fd; - fd[new_fd].state = STATE_FTP_DATA_READY; - fd[new_fd].count = &servers; - fd[new_fd].start = now; - fd[new_fd].ready = now + server_delay * HRTIME_MSECOND; - fd[new_fd].keepalive = server_keepalive ? server_keepalive : INT_MAX; - fd[new_fd].state = STATE_FTP_DATA_READY; - fd[new_fd].doc = fd[sock].doc; - fd[new_fd].doc_length = fd[sock].doc_length; - if (verbose) { - printf("accept_ftp_data %d for %d\n", new_fd, sock); - } - send_ftp_data_when_ready(new_fd); - return 1; -} - -static int -open_server(unsigned short int port, accept_fn_t accept_fn) -{ - struct linger lngr; - int sock; - int one = 1; - - /* Create the socket. */ - sock = socket(AF_INET, SOCK_STREAM, 0); - if (sock < 0) { - perror("socket"); - exit(EXIT_FAILURE); - } - struct sockaddr_in &name = fd[sock].name; - - /* Give the socket a name. */ - name.sin_family = AF_INET; - name.sin_port = htons(port); - name.sin_addr.s_addr = htonl(INADDR_ANY); - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)) < 0) { - perror((char *)"setsockopt"); - exit(EXIT_FAILURE); - } - if (bind(sock, (struct sockaddr *)&name, sizeof(name)) < 0) { - if (errno == EADDRINUSE) { - close(sock); - return -EADDRINUSE; - } - perror("bind"); - exit(EXIT_FAILURE); - } - - int addrlen = sizeof(name); - if (getsockname(sock, (struct sockaddr *)&name, -#if 0 - &addrlen -#else - (socklen_t *)&addrlen -#endif - ) < 0) { - perror("getsockname"); - exit(EXIT_FAILURE); - } - ink_assert(addrlen); - - /* Tell the socket not to linger on exit */ - lngr.l_onoff = 0; - lngr.l_linger = 0; - if (setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&lngr, sizeof(struct linger)) < 0) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } - - if (listen(sock, 1024) < 0) { - perror("listen"); - exit(EXIT_FAILURE); - } - - /* put the socket in non-blocking mode */ - if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0) { - perror("fcntl"); - exit(EXIT_FAILURE); - } - - if (verbose) { - printf("opening server on %d port %d\n", sock, port); - } - - poll_init_set(sock, accept_fn); - - return sock; -} - -// perform poll and invoke callbacks on active descriptors -static int -poll_loop() -{ - if (server_fd > 0) { - while (read_ready(server_fd) > 0) { - accept_read(server_fd); - } - } - pollfd pfd[POLL_GROUP_SIZE]; - int ip = 0; - now = ink_get_hrtime(); - for (int i = 0; i <= last_fd; i++) { - if (fd[i].fd > 0 && (!fd[i].ready || now >= fd[i].ready)) { - pfd[ip].fd = i; - pfd[ip].events = 0; - pfd[ip].revents = 0; - if (fd[i].read_cb) { - pfd[ip].events |= POLLIN; - } - if (fd[i].write_cb) { - pfd[ip].events |= POLLOUT; - } - ip++; - } - if (ip >= POLL_GROUP_SIZE || i == last_fd) { - int n = poll(pfd, ip, POLL_TIMEOUT); - if (n > 0) { - for (int j = 0; j < ip; j++) { - if (pfd[j].revents & (POLLIN | POLLERR | POLLHUP | POLLNVAL)) { - if (verbose) { - printf("poll read %d %X\n", pfd[j].fd, pfd[j].revents); - } - if (fd[pfd[j].fd].read_cb && fd[pfd[j].fd].read_cb(pfd[j].fd)) { - fd[pfd[j].fd].close(); - continue; - } - } - if (pfd[j].revents & (POLLOUT | POLLERR | POLLHUP | POLLNVAL)) { - if (verbose) { - printf("poll write %d %X\n", pfd[j].fd, pfd[j].revents); - } - if (fd[pfd[j].fd].write_cb && fd[pfd[j].fd].write_cb(pfd[j].fd)) { - fd[pfd[j].fd].close(); - continue; - } - } - } - } - ip = 0; - } - } - return 0; -} - -static int -gen_bfc_dist(double f = 10.0) -{ - if (docsize >= 0) { - return docsize; - } - - double rand = 0.0; - double rand2 = 0.0; - bool f_given = f < 9.0; - if (!f_given) { - rand = ts::Random::drandom(); - rand2 = ts::Random::drandom(); - } else { - rand = f; - rand2 = (f * 13.0) - floor(f * 13.0); - } - - int class_no; - int file_no = 0; - - if (rand < 0.35) { - class_no = 0; - } else if (rand < 0.85) { - class_no = 1; - } else if (rand < 0.99) { - class_no = 2; - } else { - class_no = 3; - if (f_given) { - rand2 = (f * 113.0) - floor(f * 113.0); - } - } - - if (rand2 < 0.018) { - file_no = 0; - } else if (rand2 < 0.091) { - file_no = 1; - } else if (rand2 < 0.237) { - file_no = 2; - } else if (rand2 < 0.432) { - file_no = 3; - } else if (rand2 < 0.627) { - file_no = 4; - } else if (rand2 < 0.783) { - file_no = 5; - } else if (rand2 < 0.887) { - file_no = 6; - } else if (rand2 < 0.945) { - file_no = 7; - } else if (rand2 < 1.000) { - file_no = 8; - } - int size = 100; - int i; - for (i = 0; i < class_no; i++) { - size = size * 10; - } - int increment = size; - size = size * (file_no + 1); - // vary about the mean doc size for - // that class/size - if (!f_given) { - size += (int)((-increment * 0.5) + (increment * ts::Random::drandom())); - } - if (verbose) { - printf("gen_bfc_dist %d\n", size); - } - return size; -} - -static void -build_response() -{ - int maxsize = docsize > MAX_RESPONSE_LENGTH ? docsize : MAX_RESPONSE_LENGTH; - response_buffer = (char *)malloc(maxsize + HEADER_SIZE); - for (int i = 0; i < maxsize + HEADER_SIZE; i++) { - response_buffer[i] = i % 256; - } -} - -static void -put_ka(int sock) -{ - int i = 0; - for (; i < n_ka_cache; i++) { - if (!ka_cache_head[i] || fd[ka_cache_head[i]].ip == fd[sock].ip) { - goto Lpush; - } - } - i = n_ka_cache++; -Lpush: - if (ka_cache_tail[i]) { - fd[ka_cache_tail[i]].next = sock; - } else { - ka_cache_head[i] = sock; - } - ka_cache_tail[i] = sock; -} - -static int -get_ka(unsigned int ip) -{ - for (int i = 0; i < n_ka_cache; i++) { - if (fd[ka_cache_head[i]].ip == ip) { - int res = ka_cache_head[i]; - ka_cache_head[i] = fd[ka_cache_head[i]].next; - if (res == ka_cache_tail[i]) { - ink_assert(!ka_cache_head[i]); - ka_cache_tail[i] = 0; - } - return res; - } - } - return -1; -} - -static void -defer_url(char *url) -{ - if (n_defered_urls < MAX_DEFERED_URLS - 1) { - defered_urls[n_defered_urls++] = strdup(url); - } else { - fprintf(stderr, "too many deferred urls, dropping '%s'\n", url); - } -} - -static int -throttling_connections() -{ - return client_rate && keepalive_cons && current_clients >= keepalive_cons; -} - -static void -done() -{ - interval_report(); - exit(0); -} - -static int -is_done() -{ - return (urls_mode && !current_clients && !n_defered_urls) || (bandwidth_test && bandwidth_test_to_go <= 0 && !current_clients); -} - -static void -undefer_url(bool unthrottled) -{ - if ((unthrottled || !throttling_connections()) && n_defered_urls) { - --n_defered_urls; - char *url = defered_urls[n_defered_urls]; - make_url_client(url, 0, true, unthrottled); - free(url); - if (verbose) { - printf("undefer_url: made client %d clients\n", current_clients); - } - } else if (verbose) { - printf("undefer_url: throttle\n"); - } - if (is_done()) { - done(); - } -} - -static void -init_client(int sock) -{ - poll_init(sock); - fd[sock].start = now; - fd[sock].ready = now; - fd[sock].count = &clients; - poll_set(sock, nullptr, write_request); -} - -static unsigned int -get_addr(const char *host) -{ - unsigned int addr = inet_addr(host); - struct hostent *host_info = nullptr; - - if (!addr || (-1 == (int)addr)) { - host_info = gethostbyname(host); - if (!host_info) { - printf("gethostbyname(%s): %s\n", host, hstrerror(h_errno)); - return (unsigned int)-1; - } - addr = *((unsigned int *)host_info->h_addr); - } - - return addr; -} - -static char * -find_href_end(char *start, int len) -{ - char *end = start; - if (!start) { - return nullptr; - } - - while (*end && len > 0) { - if (*end == '\"') { - break; /* " */ - } - if (*end == '\'') { - break; - } - if (*end == '>') { - break; - } - if (*end == ' ') { - break; - } - if (*end == '\t') { - break; - } - if (*end == '\n') { - break; - } - if (*end == '<') { - break; - } - if (*end & 0x80) { - break; /* hi order bit! */ - } - len--; - end++; - } - - if (*end == 0 || len == 0) { - return nullptr; - } else { - return end; - } -} // find_href_end - -static char * -find_href_start(const char *tag, char *base, int len) -{ - int taglen = strlen(tag); - if (base == nullptr) { - return nullptr; - } - - char *start = base; - char *end = base + len; - -Lagain: { - start = strncasestr(start, tag, len); - if ((start == nullptr) || (end - start < 6)) { - return nullptr; - } - start += taglen; - len -= taglen; -} // block - - while (ParseRules::is_ws(*start) && (end - start > 1)) { - start++; - len--; - } - if (*start == '=' && (end - start > 1)) { - start++; - len--; - } else { - goto Lagain; - } - while (ParseRules::is_ws(*start) && (end - start > 1)) { - start++; - len--; - } - // - // Optional quotes: href="x" or href='x' or href=x - // - if ((*start == '\"' || (*start == '\'')) && (end - start > 1)) { /*"'*/ - start++; - len--; - } - while (ParseRules::is_ws(*start) && (end - start > 1)) { - start++; - len--; - } - - return start; -} // find_href_start - -static int -compose_url(char *new_url, char *base, char *input) -{ - char sche[8], host[512], port[10], path[512], frag[512], quer[512], para[512]; - char curl[512]; - int xsche, xhost, xport, xpath, xfrag, xquer, xpar, rel, slash; - ink_web_decompose_url(base, sche, host, port, path, frag, quer, para, &xsche, &xhost, &xport, &xpath, &xfrag, &xquer, &xpar, &rel, - &slash); - strcpy(curl, "http://"); - strcat(curl, host); - if (xport) { - strcat(curl, ":"); - strcat(curl, port); - } - strcat(curl, "/"); - strcat(curl, path); - - ink_web_canonicalize_url(curl, input, new_url, 512); - return 0; -} // compose_urls - -static void -compose_all_urls(const char *tag, char *buf, char *start, char *end, int buflen, char *base_url) -{ - char old; - while ((start = find_href_start(tag, end, buflen - (end - buf)))) { - char newurl[512]; - end = (char *)find_href_end(start, std::min(static_cast(buflen - (start - buf)), 512 - 10)); - if (!end) { - end = start + strlen(tag); - continue; - } - old = *end; - *end = 0; - compose_url(newurl, base_url, start); - make_url_client(newurl, base_url); - *end = old; - } // while -} -// -// Input is a nullptr-terminated string (buf of buflen) -// also, a read-write base_url -// -static void -extract_urls(char *buf, int buflen, char *base_url) -{ - // if (verbose) printf("EXTRACT<<%s\n>>", buf); - char *start = nullptr; - char *end = nullptr; - char old_base[512] = {0}; - strncpy(old_base, base_url, sizeof(old_base) - 1); - - start = strncasestr(buf, "', buflen - (start - buf)); - if (end) { - char *rover = strncasestr(start, "href", end - start); - if (rover) { - rover += 4; - while (rover < end && (ParseRules::is_ws(*rover) || *rover == '=' || *rover == '\'' || *rover == '\"')) { /* " */ - rover++; - } - start = rover; - while (rover < end && !(ParseRules::is_ws(*rover) || *rover == '\'' || *rover == '\"')) { - rover++; - } - *rover = 0; - compose_url(base_url, old_base, start); - // fixup unqualified hostnames (e.g. http://internal/foo) - char *he = strchr(base_url + 8, '/'); - if (!memchr(base_url, '.', he - base_url)) { - char t[512] = {0}; - strncpy(t, base_url, sizeof(t) - 1); - char *old_he = strchr(old_base + 8, '.'); - if (old_he) { - char *old_hee = strchr(old_he, '/'); - if (old_hee) { - memcpy(base_url, t, (he - base_url)); - memcpy(base_url + (he - base_url), old_he, (old_hee - old_he)); - memcpy(base_url + (he - base_url) + (old_hee - old_he), t + (he - base_url), strlen(t + (he - base_url))); - base_url[(he - base_url) + (old_hee - old_he) + strlen(t + (he - base_url))] = 0; - } - } - } - } - } - } - - end = buf; - if (follow) { - compose_all_urls("href", buf, start, end, buflen, base_url); - } - if (fullpage) { - const char *tags[] = { - "src", "image", "object", "archive", "background", - // "location", "code" - }; - for (unsigned i = 0; i < sizeof(tags) / sizeof(tags[0]); i++) { - compose_all_urls(tags[i], buf, start, end, buflen, base_url); - } - } -} // extract_urls - -static void -follow_links(int sock) -{ - if (urls_mode) { - if (fd[sock].binary) { - return; - } - int l = fd[sock].response_remaining; - char *r = fd[sock].response, *p = r, *n = r; - if (r) { - extract_urls(r, l, fd[sock].base_url); - } - if (l < MAX_BUFSIZE) { - while (n) { - n = (char *)memchr(p, '\n', l - (p - r)); - if (!n) { - n = (char *)memchr(p, '\r', l - (p - r)); - } - if (n) { - p = n + 1; - } - } - int done = p - r, remaining = l - done; - if (done) { - memmove(r, p, remaining); - fd[sock].response_remaining = remaining; - } - } else { // bail - fd[sock].response_length = 0; - } - } -} - -static int -verify_content(int sock, char *buf, int done) -{ - if ((urls_mode && !check_content) || range_mode) { - return 1; - } - int l = fd[sock].response_length; - char *d = response_buffer + (l % 256) + fd[sock].nalternate; - int left = fd[sock].length; - if (left > 0) { - if (embed_url && !fd[sock].jg_compressed) { - if (l == left && left > 64) { - char *url_end = nullptr, *url_start = nullptr; - get_path_from_req(fd[sock].base_url, &url_start, &url_end); - if (url_end - url_start < done) { - if (memcmp(url_start, buf, url_end - url_start)) { - return 0; - } - } - } - // skip past the URL which is embedded in the document - // to confound the fingerprinting code - if (l - left < 64) { - int skip = 64 - (l - left); - left -= skip; - done -= skip; - buf += skip; - if (done < 0) { - done = 0; - } - } - } - if (!check_content) { - return 1; - } - if (done > left) { - done = left; - } - if (memcmp(buf, d + (fd[sock].response_length - left), done)) { - return 0; - } - } - return 1; -} - -#define ZIPF_SIZE (1 << 20) -static double *zipf_table = nullptr; -static void -build_zipf() -{ - zipf_table = (double *)malloc(ZIPF_SIZE * sizeof(double)); - for (int i = 0; i < ZIPF_SIZE; i++) { - zipf_table[i] = 1.0 / pow(i + 2, zipf); - } - for (int i = 1; i < ZIPF_SIZE; i++) { - zipf_table[i] = zipf_table[i - 1] + zipf_table[i]; - } - double x = zipf_table[ZIPF_SIZE - 1]; - for (int i = 0; i < ZIPF_SIZE; i++) { - zipf_table[i] = zipf_table[i] / x; - } -} - -static int -get_zipf(double v) -{ - int l = 0, r = ZIPF_SIZE - 1, m; - do { - m = (r + l) / 2; - if (v < zipf_table[m]) { - r = m - 1; - } else { - l = m + 1; - } - } while (l < r); - if (zipf_bucket_size == 1) { - return m; - } - double x = zipf_table[m], y = zipf_table[m + 1]; - m += static_cast((v - x) / (y - x)); - return m; -} - -static int -read_response_error(int sock) -{ - errors++; - fd[sock].close(); - if (!urls_mode) { - make_bfc_client(proxy_addr, proxy_port); - } - return 0; -} - -static int -read_response(int sock) -{ - int err = 0; - - if (fd[sock].req_pos >= 0) { - if (!fd[sock].req_pos) { - memset(fd[sock].req_header, 0, HEADER_SIZE); - } - do { - int l = HEADER_SIZE - fd[sock].req_pos - 1; - if (l <= 0) { - if (verbose || verbose_errors) { - // coverity[string_null_argument] - printf("header too long '%s'", fd[sock].req_header); - } - return read_response_error(sock); - } - err = read(sock, fd[sock].req_header + fd[sock].req_pos, HEADER_SIZE - fd[sock].req_pos - 1); - } while ((err == -1) && (errno == EINTR)); - if (err <= 0) { - if (!err) { - if (verbose_errors) { - printf("read_response %d closed during header for '%s' after %d%s\n", sock, fd[sock].base_url, fd[sock].req_pos, - (keepalive && (fd[sock].keepalive != keepalive) && !fd[sock].req_pos) ? " -- keepalive timeout" : ""); - } - return read_response_error(sock); - } - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - if (errno == ECONNRESET) { - if (!fd[sock].req_pos && keepalive > 0 && fd[sock].keepalive != keepalive) { - fd[sock].close(); - if (!urls_mode) { - make_bfc_client(proxy_addr, proxy_port); - } - return 0; - } - if (verbose || verbose_errors) { - perror("read"); - } - goto Ldone; - } - panic_perror("read"); - } - if (verbose) { - printf("read %d header %d [%s]\n", sock, err, fd[sock].req_header); - } - b1_ops++; - - strcpy(fd[sock].response_header, fd[sock].req_header); - - b1latency += (int)elapsed_from_start(sock); - new_cbytes += err; - new_tbytes += err; - fd[sock].req_pos += err; - fd[sock].bytes += err; - fd[sock].active = ink_get_hrtime(); - int total_read = fd[sock].req_pos; - char *p = fd[sock].req_header; - char *cl = nullptr; - int cli = 0; - while ((p = strchr(p, '\n'))) { - if (verbose) { - printf("read header end? [%s]\n", p); - } - if (p[1] == '\n' || (p[1] == '\r' && p[2] == '\n')) { - int off = 1 + (p[1] == '\r' ? 2 : 1); - p += off; - strncpy(fd[sock].response_header, fd[sock].req_header, p - fd[sock].req_header); - fd[sock].response_header[p - fd[sock].req_header] = '\0'; - int lbody = fd[sock].req_pos - (p - fd[sock].req_header); - cl = strncasestr(fd[sock].req_header, "Content-Length:", p - fd[sock].req_header); - if (cl) { - cli = atoi(cl + 16); - int expected_length = fd[sock].response_length; - if (compd_suite) { - if (strstr(fd[sock].req_header, "x-jg")) { - fd[sock].jg_compressed = 1; - expected_length = (fd[sock].response_length * 2) / 3; - } - } - if (fd[sock].response_length && verbose_errors && expected_length != cli && !nocheck_length) { - fprintf(stderr, "bad Content-Length expected %d got %d orig %d\n", expected_length, cli, fd[sock].response_length); - } - fd[sock].response_length = fd[sock].length = cli; - } - if (fd[sock].req_header[9] == '2') { - if (!verify_content(sock, p, lbody)) { - if (verbose || verbose_errors) { - printf("content verification error '%s'\n", fd[sock].base_url); - } - return read_response_error(sock); - } - } - total_proxy_response_body_bytes += lbody; - total_proxy_response_header_bytes += p - fd[sock].req_header; - fd[sock].length -= lbody; - ink_assert(fd[sock].length >= 0); - fd[sock].req_pos = -1; - if (fd[sock].length && ts::Random::drandom() < client_abort_rate) { - fd[sock].client_abort = 1; - fd[sock].length = (int)(ts::Random::drandom() * (fd[sock].length - 1)); - fd[sock].keepalive = 0; - fd[sock].drop_after_CL = 1; - } - if (verbose) { - printf("read %d header done\n", sock); - } - break; - } - p++; - } - if (!p) { - return 0; - } - int hlen = p - fd[sock].req_header; - if (show_headers) { - printf("Response From Proxy: {\n"); - for (char *c = fd[sock].req_header; c < p; c++) { - putc(*c, stdout); - } - printf("}\n"); - } - if (obey_redirects && urls_mode && fd[sock].req_header[9] == '3' && fd[sock].req_header[10] == '0' && - (fd[sock].req_header[11] == '1' || fd[sock].req_header[11] == '2')) { - char *redirect = strstr(fd[sock].req_header, "http://"); - char *e = redirect ? (char *)memchr(redirect, '\n', hlen) : 0; - if (!redirect || !e) { - fprintf(stderr, "bad redirect '%s'", fd[sock].req_header); - } else { - if (e[-1] == '\r') { - e--; - } - *e = 0; - make_url_client(redirect); - } - fd[sock].close(); - return 0; - } - if (fd[sock].req_header[9] != '2') { - if (verbose_errors) { - char *e = (char *)memchr(fd[sock].req_header, '\r', hlen); - if (e) { - *e = 0; - } else { - char *e = (char *)memchr(fd[sock].req_header, '\n', hlen); - if (e) { - *e = 0; - } else { - *p = 0; - } - } - printf("error response %d after %dms: '%s':'%s' %lu-%lu\n", sock, (int)elapsed_from_start(sock), fd[sock].base_url, - fd[sock].req_header, fd[sock].range_start, fd[sock].range_end); - } - return read_response_error(sock); - } - char *r = fd[sock].req_header; - int length = p - r; - char *ka = check_keepalive(r, length); - if (urls_mode) { - fd[sock].response_remaining = total_read - length; - if (fd[sock].response_remaining) { - memcpy(fd[sock].response, p, fd[sock].response_remaining); - } - if (check_content && !cl) { - if (verbose || verbose_errors) { - printf("missing Content-Length '%s'\n", fd[sock].base_url); - } - return read_response_error(sock); - } - } else { - fd[sock].response = 0; - } - if (!cl || !ka) { - fd[sock].keepalive = -1; - } - if (!cl) { - fd[sock].length = INT_MAX; - } - } - - if (fd[sock].length <= 0 && (fd[sock].keepalive > 0 || fd[sock].drop_after_CL)) { - goto Ldone; - } - - { - char *r = nullptr; - char buf[MAX_BUFSIZE]; - int toread = cbuffersize; - if (urls_mode) { - if (fd[sock].response_remaining + cbuffersize < MAX_BUFSIZE) { - r = fd[sock].response + fd[sock].response_remaining; - } else { - toread = MAX_BUFSIZE - fd[sock].response_remaining; - if (!toread) { - if (verbose_errors || verbose) { - fprintf(stderr, "line exceeds buffer, unable to follow links\n"); - } - toread = cbuffersize; - r = fd[sock].response; - fd[sock].response_remaining = 0; - } else { - r = fd[sock].response + fd[sock].response_remaining; - } - } - } else { - r = buf; - } - if (fast(sock, client_speed, fd[sock].bytes)) { - return 0; - } - if (fd[sock].bytes > abort_retry_bytes && (((now - fd[sock].start + 1) / HRTIME_SECOND) > abort_retry_secs) && - !faster_than(sock, abort_retry_speed, fd[sock].bytes)) { - fd[sock].client_abort = 1; - fd[sock].keepalive = 0; - if (!urls_mode && !client_rate) { - make_bfc_client(proxy_addr, proxy_port); - } - goto Ldone; - } - do { - err = read(sock, r, toread); - } while ((err == -1) && (errno == EINTR)); - if (err < 0) { - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - if (errno == ECONNRESET) { - if (verbose || verbose_errors) { - perror("read"); - } - goto Ldone; - } - panic_perror("read"); - } - if (!err) { - goto Ldone; - } - if (!verify_content(sock, buf, err)) { - if (verbose || verbose_errors) { - printf("content verification error '%s'\n", fd[sock].base_url); - } - return read_response_error(sock); - } - total_proxy_response_body_bytes += err; - new_cbytes += err; - new_tbytes += err; - fd[sock].response_remaining += err; - fd[sock].bytes += err; - follow_links(sock); - if (fd[sock].length != INT_MAX) { - fd[sock].length -= err; - } - fd[sock].active = ink_get_hrtime(); - if (verbose) { - printf("read %d got %d togo %d %d %d\n", sock, err, fd[sock].length, fd[sock].keepalive, fd[sock].drop_after_CL); - } - } - - if (fd[sock].length <= 0 && (fd[sock].keepalive > 0 || fd[sock].drop_after_CL)) { - goto Ldone; - } - - return 0; - -Ldone: - if (!fd[sock].client_abort && !(server_abort_rate > 0) && fd[sock].length && fd[sock].length != INT_MAX) { - if (verbose || verbose_errors) { - printf("bad length %d wanted %d after %d ms: '%s'\n", fd[sock].response_length - fd[sock].length, fd[sock].response_length, - (int)((ink_get_hrtime() - fd[sock].active) / HRTIME_MSECOND), fd[sock].base_url); - } - return read_response_error(sock); - } - if (verbose) { - printf("read %d done\n", sock); - } - new_ops++; - double thislatency = elapsed_from_start(sock); - latency += (int)thislatency; - lat_ops++; - if (fd[sock].keepalive > 0) { - fd[sock].reset(); - put_ka(sock); - current_clients--; - if (urls_mode) { - undefer_url(); - return 0; - } - } else { - fd[sock].close(); - } - if (!urls_mode && !client_rate) { - make_bfc_client(proxy_addr, proxy_port); - } - return 0; -} - -static int -write_request(int sock) -{ - int err = 0; - - // send request header - if (!fd[sock].send_header) { - do { - err = write(sock, fd[sock].req_header + fd[sock].req_pos, fd[sock].length - fd[sock].req_pos); - } while ((err == -1) && (errno == EINTR)); - if (err <= 0) { - if (!err) { - if (verbose_errors) { - printf("write %d closed early\n", sock); - } - goto Lerror; - } - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - perror("write"); - goto Lerror; - } - if (verbose) { - printf("write %d %d\n", sock, err); - } - - new_tbytes += err; - total_client_request_bytes += err; - fd[sock].req_pos += err; - fd[sock].active = ink_get_hrtime(); - - if (fd[sock].req_pos >= fd[sock].length) { - if (verbose) { - printf("write request header complete %d %d\n", sock, fd[sock].length); - } - fd[sock].req_pos = 0; - fd[sock].length = fd[sock].response_length; - if (!post_support || !fd[sock].post_size) { - poll_set(sock, read_response); - return 0; - } - fd[sock].send_header = 1; - } - } - - // send request body - ink_assert(MAX_RESPONSE_LENGTH > fd[sock].post_size); - - if (fd[sock].send_header) { - do { - err = write(sock, response_buffer + fd[sock].req_pos, fd[sock].post_size - fd[sock].req_pos); - } while ((err == -1) && (errno == EINTR)); - if (err <= 0) { - if (!err) { - if (verbose_errors) { - printf("write %d closed early\n", sock); - } - goto Lerror; - } - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - perror("write"); - goto Lerror; - } - if (verbose) { - printf("write %d %d\n", sock, err); - } - - new_tbytes += err; - total_client_request_bytes += err; - fd[sock].req_pos += err; - fd[sock].active = ink_get_hrtime(); - - if (fd[sock].req_pos >= fd[sock].post_size) { - if (verbose) { - printf("write request body complete %d %d\n", sock, fd[sock].length); - } - fd[sock].send_header = 0; - fd[sock].req_pos = 0; - fd[sock].length = fd[sock].response_length; - poll_set(sock, read_response); - } - } - return 0; -Lerror: - errors++; -#ifndef RETRY_CLIENT_WRITE_ERRORS - if (!--nclients) { - panic("no more clients\n"); - } - return 1; -#else - if (!urls_mode) - make_bfc_client(proxy_host, proxy_port); - fd[sock].close(); - return 0; -#endif -} - -static int -write_ftp_response(int sock) -{ - int err = 0; - - do { - err = write(sock, fd[sock].req_header + fd[sock].req_pos, fd[sock].length - fd[sock].req_pos); - } while ((err == -1) && (errno == EINTR)); - - if (err <= 0) { - if (!err) { - if (verbose_errors) { - printf("write %d closed early\n", sock); - } - goto Lerror; - } - if (errno == EAGAIN || errno == ENOTCONN) { - return 0; - } - perror("write"); - goto Lerror; - } - if (verbose) { - printf("write %d %d\n", sock, err); - } - - new_tbytes += err; - fd[sock].req_pos += err; - - if (fd[sock].req_pos >= fd[sock].length) { - if (verbose) { - printf("write complete %d %d\n", sock, fd[sock].length); - } - fd[sock].req_pos = 0; - fd[sock].length = fd[sock].response_length; - poll_set(sock, read_ftp_request); - } - return 0; -Lerror: - errors++; - return 1; -} - -static int -make_client(unsigned int addr, int port) -{ - struct linger lngr; - - int sock = socket(PF_INET, SOCK_STREAM, 0); - if (sock < 0) { - panic_perror("socket"); - } - - if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0) { - panic_perror("fcntl"); - } - - /* tweak buffer size so that remote end can't close connection too fast */ - -#if 0 - int bufsize = cbuffersize; - if (setsockopt(sock,SOL_SOCKET,SO_RCVBUF, - (const char *)&bufsize,sizeof(bufsize)) < 0) - panic_perror("setsockopt"); - if (setsockopt(sock,SOL_SOCKET,SO_SNDBUF, - (const char *)&bufsize,sizeof(bufsize)) < 0) - panic_perror("setsockopt"); -#endif - int enable = 1; - if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (const char *)&enable, sizeof(enable)) < 0) { - panic_perror("setsockopt"); - } - - /* Tell the socket not to linger on exit */ - lngr.l_onoff = 1; - lngr.l_linger = 0; - if (!ftp) { // this causes problems for PORT ftp -- ewong - if (setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&lngr, sizeof(struct linger)) < 0) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } - } - - /* Give the socket a name. */ - struct sockaddr_in name; - memset(&name, 0, sizeof(sockaddr_in)); - name.sin_family = AF_INET; - name.sin_port = htons(port); - name.sin_addr.s_addr = addr; - - if (verbose) { - printf("connecting to %u.%u.%u.%u:%d\n", ((unsigned char *)&addr)[0], ((unsigned char *)&addr)[1], ((unsigned char *)&addr)[2], - ((unsigned char *)&addr)[3], port); - } - - while (connect(sock, (struct sockaddr *)&name, sizeof(name)) < 0) { - if (errno == EINTR) { - continue; - } - if (errno == EINPROGRESS) { - break; - } - if (verbose_errors) { - fprintf(stderr, "connect failed errno = %d\n", errno); - } - errors++; - close(sock); - return -1; - } - - init_client(sock); - fd[sock].ip = addr; - clients++; - current_clients++; - new_clients++; - return sock; -} - -static void -make_range_header(int sock, double dr, char *rbuf, int size_limit) -{ - int tmp[3]; - - if (!rbuf || !size_limit) - return; - - tmp[0] = gen_bfc_dist(dr - 1.0); - tmp[1] = ((int)(ts::Random::drandom() * 1000000)) % (tmp[0] - 1 - 0 + 1); - tmp[2] = ((int)(ts::Random::drandom() * 1000000)) % (tmp[0] - 1 - 0 + 1) + tmp[1] + 100; - - if (tmp[0] > 100) { - if (tmp[0] <= tmp[2]) { - tmp[2] = tmp[0] - 1; - } - - if (tmp[2] - tmp[1] < 100) { - tmp[1] = tmp[2] - 100; - } - } else { - tmp[1] = 0; - tmp[2] = 99; - } - - fd[sock].response_length = tmp[0]; - fd[sock].range_start = tmp[1] > tmp[2] ? tmp[2] : tmp[1]; - fd[sock].range_end = tmp[1] < tmp[2] ? tmp[2] : tmp[1]; - - ink_assert((int)(fd[sock].range_end - fd[sock].range_start + 1) >= 100); - snprintf(rbuf, size_limit, "Range: bytes=%lu-%lu\r\n", fd[sock].range_start, fd[sock].range_end); -} - -static void -make_random_url(int sock, double *dr, double *h) -{ - *dr = ts::Random::drandom(); - *h = ts::Random::drandom(); - - if (zipf == 0.0) { - if (*h < hitrate) { - *dr = 1.0 + (floor(*dr * hotset) / hotset); - fd[sock].response_length = gen_bfc_dist(*dr - 1.0); - } else - fd[sock].response_length = gen_bfc_dist(*dr); - } else { - unsigned long long int doc = get_zipf(*dr); - // Some large randomish number. - unsigned long long int doc_len_int = doc * 0x14A4D0FB0E93E3A7LL; - unsigned long int x = doc_len_int; - double y = (double)x; - y /= 0x100000000LL; // deterministic random number between 0 and 1.0 - fd[sock].response_length = gen_bfc_dist(y); - *dr = doc; - range_mode = 0; - } -} - -static int -make_nohost_request(int sock, double dr, const char *evo_str, const char *extension, const char *eheaders, const char *rbuf, - const char *cookie) -{ - int post_length = 0; - - switch (post_support) { - case 0: - if (range_mode) { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "GET http://%s:%d/%12.10f/%d%s%s HTTP/1.1\r\n" - "%s" - "%s" - "%s" - "%s" - "%s" - "%s" - "\r\n", - local_host, server_port, dr, fd[sock].response_length, evo_str, extension, - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "Connection: close\r\n", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, "Host: localhost\r\n", rbuf, cookie); - } else { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - ftp ? "GET ftp://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n" - "%s" - "%s" - "%s" - "%s" - "\r\n" : - "GET http://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n" - "%s" - "%s" - "%s" - "%s" - "\r\n", - local_host, server_port, dr, fd[sock].response_length, evo_str, extension, - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, cookie); - } - break; - case 1: - if (range_mode) { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "POST http://%s:%d/%12.10f/%d%s%s HTTP/1.1\r\n" - "Content-Length: %d\r\n" - "%s" - "%s" - "%s" - "%s" - "%s" - "%s" - "\r\n", - local_host, server_port, dr, fd[sock].response_length, evo_str, extension, fd[sock].response_length, - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "Connection: close\r\n", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, "Host: localhost\r\n", rbuf, cookie); - } else { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "POST http://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n" - "Content-Length: %d\r\n" - "%s" - "%s" - "%s" - "%s" - "\r\n", - local_host, server_port, dr, fd[sock].response_length, evo_str, extension, fd[sock].response_length, - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, cookie); - } - post_length = fd[sock].response_length; - break; - case 2: - if (!post_size) - ink_assert(!"post_size should never be zero!"); - - if (range_mode) { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "POST http://%s:%d/%12.10f/%d%s%s HTTP/1.1\r\n" - "Content-Length: %d\r\n" - "%s" - "%s" - "%s" - "%s" - "%s" - "%s" - "\r\n", - local_host, server_port, dr, fd[sock].response_length, evo_str, extension, post_size, - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "Connection: close\r\n", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, "Host: localhost\r\n", rbuf, cookie); - } else { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "POST http://%s:%d/%12.10f/%d%s%s HTTP/1.0\r\n" - "Content-Length: %d\r\n" - "%s" - "%s" - "%s" - "%s" - "\r\n", - local_host, server_port, dr, fd[sock].response_length, evo_str, extension, post_size, - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, cookie); - } - post_length = post_size; - break; - } - - return post_length; -} - -static int -make_host1_request(int sock, double dr, const char *evo_str, const char *extension, const char *eheaders, const char *cookie) -{ - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "GET /%12.10f/%d%s%s HTTP/1.0\r\n" - "Host: %s:%d\r\n" - "%s" - "%s" - "%s" - "%s" - "\r\n", - dr, fd[sock].response_length, evo_str, extension, local_host, server_port, - fd[sock].keepalive ? "Connection: Keep-Alive\r\n" : "", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, cookie); - return 0; -} - -static int -make_host2_request(int sock, double dr, const char *evo_str, const char *extension, const char *eheaders, const char *cookie) -{ - /* Send a non-proxy client request i.e. for Transparency testing */ - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "GET /%12.10f/%d%s%s HTTP/1.0\r\n" - "%s" - "%s" - "%s" - "%s" - "\r\n", - dr, fd[sock].response_length, evo_str, extension, fd[sock].keepalive ? "Connection: Keep-Alive\r\n" : "", - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", eheaders, cookie); - return 0; -} - -static int -build_request(int sock) -{ - double dr, h; - char rbuf[1024]; - - make_random_url(sock, &dr, &h); - - if (verbose) { - printf("gen_bfc_dist %d\n", fd[sock].response_length); - } - - if (range_mode) { - make_range_header(sock, dr, rbuf, 1024); - } - - char eheaders[16384]; - *eheaders = 0; - int nheaders = extra_headers; - if (nheaders > 0) { - char *eh = eheaders; - char *eheaders_end = eheaders + sizeof(eheaders); - if (!vary_user_agent) { - eh += snprintf(eh, sizeof(eheaders) - (eh - eheaders), "User-Agent: Mozilla/4.04 [en] (X11; I; Linux 2.0.31 i586)\r\n"); - ink_release_assert(eh < eheaders_end); // validate that eh doesn't point past eheaders (buffer overflow) - nheaders--; - } - if (nheaders > 0) { - eh += snprintf(eh, sizeof(eheaders) - (eh - eheaders), - "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n"); - ink_release_assert(eh < eheaders_end); // validate that eh doesn't point past eheaders (buffer overflow) - } - while (--nheaders > 0) { - eh += snprintf(eh, sizeof(eheaders) - (eh - eheaders), "Extra-Header%d: a lot of junk for header %d\r\n", nheaders, nheaders); - ink_release_assert(eh < eheaders_end); // validate that eh doesn't point past eheaders (buffer overflow) - } - } - char cookie[256]; - *cookie = 0; - fd[sock].nalternate = (int)(alternates * ts::Random::drandom()); - if (alternates) { - if (!vary_user_agent) { - snprintf(cookie, sizeof(cookie), "Cookie: jtest-cookie-%d\r\n", fd[sock].nalternate); - } else { - snprintf(cookie, sizeof(cookie), "User-Agent: jtest-browser-%d\r\n", fd[sock].nalternate); - } - } - const char *extension; - switch (request_extension) { - case 1: - extension = ".html"; - break; - case 2: - extension = ".jpeg"; - break; - case 3: - extension = "/"; - break; - default: - extension = (compd_suite ? ".jpeg" : ""); - } - - char evo_str[20]; - evo_str[0] = '\0'; - if (evo_rate != 0.0) { - double evo_index = dr + (((double)now) / HRTIME_HOUR) * evo_rate; - snprintf(evo_str, sizeof(evo_str), ".%u", ((unsigned int)evo_index)); - } - - int post_body = 0; - - switch (hostrequest) { - case 0: - post_body = make_nohost_request(sock, dr, evo_str, extension, eheaders, rbuf, cookie); - break; - case 1: - post_body = make_host1_request(sock, dr, evo_str, extension, eheaders, cookie); - break; - case 2: - post_body = make_host2_request(sock, dr, evo_str, extension, eheaders, cookie); - break; - default: - ink_release_assert(!"Unexpected hostrequest! Abort."); - return 0; - } - - if (range_mode) { - fd[sock].response_length = fd[sock].range_end - fd[sock].range_start + 1; - ink_assert(fd[sock].response_length > 0); - } - - return post_body; -} - -static void -make_bfc_client(unsigned int addr, int port) -{ - int sock = -1; - char rbuf[1024]; - memset(rbuf, 0, 1024); - - if (bandwidth_test && bandwidth_test_to_go-- <= 0) { - return; - } - if (keepalive) { - sock = get_ka(addr); - } - if (sock < 0) { - sock = make_client(addr, port); - fd[sock].keepalive = keepalive; - } else { - init_client(sock); - current_clients++; - fd[sock].keepalive--; - } - if (sock < 0) { - panic("unable to open client connection\n"); - } - - fd[sock].post_size = build_request(sock); - - if (verbose) { - printf("request %d [%s]\n", sock, fd[sock].req_header); - } - fd[sock].length = strlen(fd[sock].req_header); - { - char *s = fd[sock].req_header; - char *e = (char *)memchr(s, '\r', 512); - char *url = fd[sock].base_url; - memcpy(url, s, e - s); - url[e - s] = 0; - if (show_before) { - printf("%s\n", url); - } - } - if (show_headers) { - printf("Request to Proxy: {\n%s}\n", fd[sock].req_header); - } -} - -#define RUNNING(_n) \ - total_##_n = (((total_##_n * (average_over - 1)) / average_over) + new_##_n); \ - running_##_n = total_##_n / average_over; \ - new_##_n = 0; - -#define RUNNING_AVG(_t, _n, _o) \ - _t = _o ? ((_t * (average_over - 1) + _n / _o) / average_over) : _t; \ - _n = 0; - -void -interval_report() -{ - static int here = 0; - now = ink_get_hrtime(); - if (!(here++ % 20)) { - printf(" con new ops 1B lat bytes/per svrs new ops total time err\n"); - } - RUNNING(clients); - RUNNING_AVG(running_latency, latency, lat_ops); - lat_ops = 0; - RUNNING_AVG(running_b1latency, b1latency, b1_ops); - b1_ops = 0; - RUNNING(cbytes); - RUNNING(ops); - RUNNING(servers); - RUNNING(sops); - RUNNING(tbytes); - float t = (float)(now - start_time); - uint64_t per = current_clients ? running_cbytes / current_clients : 0; - printf("%4d %4d %7.1f %4d %4d %10" PRIu64 "/%-6" PRIu64 " %4d %4d %4d %9" PRIu64 " %6.1f %4d\n", - current_clients, // clients, n_ka_cache, - running_clients, running_ops, running_b1latency, running_latency, running_cbytes, per, running_servers, running_servers, - running_sops, running_tbytes, t / ((float)HRTIME_SECOND), errors); - if (is_done()) { - printf("Total Client Request Bytes:\t\t%" PRIu64 "\n", total_client_request_bytes); - printf("Total Server Response Header Bytes:\t%" PRIu64 "\n", total_server_response_header_bytes); - printf("Total Server Response Body Bytes:\t%" PRIu64 "\n", total_server_response_body_bytes); - printf("Total Proxy Request Bytes:\t\t%" PRIu64 "\n", total_proxy_request_bytes); - printf("Total Proxy Response Header Bytes:\t%" PRIu64 "\n", total_proxy_response_header_bytes); - printf("Total Proxy Response Body Bytes:\t%" PRIu64 "\n", total_proxy_response_body_bytes); - } -} - -#define URL_HASH_ENTRIES url_hash_entries -#define BYTES_PER_ENTRY 3 -#define ENTRIES_PER_BUCKET 16 -#define OVERFLOW_ENTRIES 1024 // many many - -#define BUCKETS (URL_HASH_ENTRIES / ENTRIES_PER_BUCKET) -#define BYTES_PER_BUCKET (BYTES_PER_ENTRY * ENTRIES_PER_BUCKET) -#define URL_HASH_BYTES (BYTES_PER_ENTRY * (URL_HASH_ENTRIES + OVERFLOW_ENTRIES)) - -// NOTE: change to match BYTES_PER_ENTRY -#define ENTRY_TAG(_x) (((unsigned int)_x[0] << 16) + ((unsigned int)_x[1] << 8) + (unsigned int)_x[2]) -#define SET_ENTRY_TAG(_x, _t) \ - _x[0] = _t >> 16; \ - _x[1] = (_t >> 8) & 0xFF; \ - _x[2] = _t & 0xFF; - -#define MASK_TAG(_x) (_x & ((1U << (BYTES_PER_ENTRY * 8)) - 1)) - -#define BEGIN_HASH_LOOP \ - unsigned int bucket = (i % BUCKETS); \ - unsigned int tag = MASK_TAG((unsigned int)(i / BUCKETS)); \ - if (!tag) \ - tag++; \ - unsigned char *base = bytes + bucket * BYTES_PER_BUCKET; \ - unsigned char *last = bytes + (bucket + 1) * BYTES_PER_BUCKET - BYTES_PER_ENTRY; \ - (void)last; \ - \ - for (unsigned int x = 0; x < ENTRIES_PER_BUCKET; x++) { \ - unsigned char *e = base + x * BYTES_PER_ENTRY; - -#define BEGIN_OVERFLOW_HASH_LOOP \ - for (unsigned int j = 0; j < ENTRIES_PER_BUCKET; j++) { \ - unsigned char *e = base + (URL_HASH_ENTRIES + j) * BYTES_PER_ENTRY; - -#define END_HASH_LOOP } - -struct UrlHashTable { - unsigned int numbytes; - unsigned char *bytes; - int fd; - - void - zero() - { - memset(bytes, 0, numbytes); - } - - void alloc(unsigned int want); - - void - set(uint64_t i) - { - BEGIN_HASH_LOOP - { - if (!ENTRY_TAG(e)) { - SET_ENTRY_TAG(e, tag); - return; - } - } - END_HASH_LOOP; - - fprintf(stderr, "url hash table overflow: %X, %X\n", (int)(base - bytes), tag); - - BEGIN_OVERFLOW_HASH_LOOP - { - if (!ENTRY_TAG(e)) { - SET_ENTRY_TAG(e, tag); - return; - } - } - END_HASH_LOOP; - - ink_fatal("overview entries overflow"); - } - - void - clear(uint64_t i) - { - BEGIN_HASH_LOOP - { - if (ENTRY_TAG(e) == tag) { - if (e != last) { - SET_ENTRY_TAG(e, ENTRY_TAG(last)); - } - SET_ENTRY_TAG(last, 0); - return; - } - } - END_HASH_LOOP; - - fprintf(stderr, "url hash table entry to clear not found: %X, %X\n", (int)(base - bytes), tag); - } - - int - is_set(uint64_t i) - { - BEGIN_HASH_LOOP - { - if (ENTRY_TAG(e) == tag) { - return 1; - } - } - END_HASH_LOOP; - - if (ENTRY_TAG((last))) { - BEGIN_OVERFLOW_HASH_LOOP - { - if (ENTRY_TAG(e) == tag) { - return 1; - } - } - END_HASH_LOOP; - } - return 0; - } - - UrlHashTable(); - - ~UrlHashTable(); -}; -UrlHashTable *uniq_urls = nullptr; - -UrlHashTable::UrlHashTable() : numbytes(0), bytes(nullptr), fd(-1) -{ - off_t len = 0; - - if (!url_hash_entries) { - return; - } - - if (*url_hash_filename) { - if ((fd = open(url_hash_filename, O_RDWR | O_CREAT, 0644)) == -1) { - panic_perror("failed to open URL Hash file"); - } - - len = lseek(fd, 0, SEEK_END); - } - - if (url_hash_entries > 0) { - // if they specify the number of entries round it up - url_hash_entries = (url_hash_entries + ENTRIES_PER_BUCKET - 1) & ~(ENTRIES_PER_BUCKET - 1); - numbytes = URL_HASH_BYTES; - - // ensure it is either a new file or the correct size - if (len != 0 && len != numbytes) { - panic("specified size != file size\n"); - } - - } else { - // otherwise make sure the file is non-zero and then use its - // size as the size - if (!len) { - panic("zero size URL Hash Table\n"); - } - if (len != URL_HASH_BYTES) { - fprintf(stderr, "FATAL: hash file length (%jd) != URL_HASH_BYTES (%jd)\n", (intmax_t)len, (intmax_t)URL_HASH_BYTES); - exit(1); - } - numbytes = len; - } - - if (*url_hash_filename) { - if (ftruncate(fd, numbytes) == -1) { - panic_perror("unable to truncate URL Hash file"); - } - - bytes = (unsigned char *)mmap(nullptr, numbytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (bytes == (unsigned char *)MAP_FAILED || !bytes) { - panic("unable to map URL Hash file\n"); - } - } else { - bytes = (unsigned char *)malloc(numbytes); - ink_assert(bytes); - zero(); - } -} // UrlHashTable::UrlHashTable - -UrlHashTable::~UrlHashTable() -{ - if (bytes) { - munmap((char *)bytes, numbytes); - } - if (fd != -1) { - close(fd); - } -} // UrlHashTable::~UrlHashTable - -static int -seen_it(char *url) -{ - if (!url_hash_entries) { - return 0; - } - int l = 0; - char *para = strrchr(url, '#'); - if (para) { - l = para - url; - } else { - l = strlen(url); - } - CryptoHash hash; - CryptoContext().hash_immediate(hash, reinterpret_cast(url), l); - uint64_t x = hash.fold(); - if (uniq_urls->is_set(x)) { - if (verbose) { - printf("YES: seen it '%s'\n", url); - } - return 1; - } - uniq_urls->set(x); - if (verbose) { - printf("NO: marked it '%s'\n", url); - } - return 0; -} - -static int -make_url_client(const char *url, const char *base_url, bool seen, bool unthrottled) -{ - int iport = 80; - unsigned int ip = 0; - char curl[512] = {0}; - char sche[8], host[512], port[10], path[512], frag[512], quer[512], para[512]; - int xsche, xhost, xport, xpath, xfrag, xquer, xpar, rel, slash; - - if (base_url) { - ink_web_canonicalize_url(base_url, url, curl, 512); - // hack for our own web server! - if (curl[strlen(curl) - 1] == 13) { - curl[strlen(curl) - 1] = 0; - } - if (curl[strlen(curl) - 1] == 12) { - curl[strlen(curl) - 1] = 0; - } - } else { - strncpy(curl, url, sizeof(curl) - 1); - } - if (!seen && seen_it(curl)) { - return -1; - } - ink_web_decompose_url(curl, sche, host, port, path, frag, quer, para, &xsche, &xhost, &xport, &xpath, &xfrag, &xquer, &xpar, &rel, - &slash); - if (follow_same) { - if (!xhost || strcasecmp(host, current_host)) { - if (verbose) { - printf("skipping %s\n", curl); - } - return -1; - } - } - if (!unthrottled && throttling_connections()) { - defer_url(curl); - return -1; - } - if (proxy_port) { - iport = proxy_port; - ip = proxy_addr; - } else { - if (xport) { - iport = swoc::svtoi(port); - } - if (!xhost) { - if (verbose) { - fprintf(stderr, "bad url '%s'\n", curl); - } - return -1; - } - ip = get_addr(host); - if ((int)ip == -1) { - if (verbose || verbose_errors) { - fprintf(stderr, "bad host '%s'\n", host); - } - return -1; - } - } - int sock = -1; - if (keepalive) { - sock = get_ka(ip); - } - if (sock < 0) { - sock = make_client(ip, iport); - fd[sock].keepalive = keepalive; - } else { - init_client(sock); - current_clients++; - fd[sock].keepalive--; - } - if (sock < 0) { - panic("cannot make client\n"); - } - char eheaders[16384]; - *eheaders = 0; - int nheaders = extra_headers; - memset(&eheaders, 0, 16384); - if (nheaders > 0) { - char *eh = eheaders; - if (!vary_user_agent) { - eh += snprintf(eh, sizeof(eheaders) - (eh - eheaders), "User-Agent: Mozilla/4.04 [en] (X11; I; Linux 2.0.31 i586)\r\n"); - nheaders--; - } - if (nheaders > 0) { - eh += snprintf(eh, sizeof(eheaders) - (eh - eheaders), - "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n"); - } - while (--nheaders > 0) { - eh += snprintf(eh, sizeof(eheaders) - (eh - eheaders), "Extra-Header%d: a lot of junk for header %d\r\n", nheaders, nheaders); - } - } - if (proxy_port) { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "GET %s HTTP/1.0\r\n" - "%s" - "%s" - "Accept: */*\r\n" - "%s" - "\r\n", - curl, reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", - fd[sock].keepalive ? "Proxy-Connection: Keep-Alive\r\n" : "", eheaders); - } else { - snprintf(fd[sock].req_header, fd[sock].max_req_header_size, - "GET /%s%s%s%s%s HTTP/1.0\r\n" - "Host: %s\r\n" - "%s" - "%s" - "Accept: */*\r\n" - "%s" - "\r\n", - path, xquer ? "?" : "", quer, xpar ? ";" : "", para, host, - reload_rate > ts::Random::drandom() ? "Pragma: no-cache\r\n" : "", - fd[sock].keepalive ? "Connection: Keep-Alive\r\n" : "", eheaders); - } - - if (verbose) { - printf("curl = '%s'\n", curl); - } - if (show_before) { - printf("%s\n", curl); - } - if (urlsdump_fp) { - fprintf(urlsdump_fp, "%s\n", curl); - } - if (show_headers) { - printf("Request to Proxy: {\n%s}\n", fd[sock].req_header); - } - - { - const char *ext = strrchr(path, '.'); - - fd[sock].binary = 0; - if (ext) { - fd[sock].binary = !strncasecmp(ext, ".gif", 4) || !strncasecmp(ext, ".jpg", 4); - } - } - - fd[sock].response_length = 0; - fd[sock].length = strlen(fd[sock].req_header); - if (!fd[sock].response) { - fd[sock].response = (char *)malloc(MAX_BUFSIZE); - } - strcpy(fd[sock].base_url, curl); - return sock; -} - -static FILE * -get_defered_urls(FILE *fp) -{ - char url[512]; - while (fgets(url, 512, fp)) { - if (n_defered_urls > MAX_DEFERED_URLS - 2) { - return nullptr; - } - char *e = (char *)memchr(url, '\n', 512); - if (e) { - *e = 0; - } - make_url_client(url); - } - return fp; -} - -int -main(int argc __attribute__((unused)), const char *argv[]) -{ - auto &version = AppVersionInfo::setup_version("jtest"); - - /* for QA -- we want to be able to tail an output file - * during execution "nohup jtest -P pxy -p prt &" - */ - setvbuf(stdout, (char *)nullptr, _IOLBF, 0); - - fd = (FD *)malloc(MAXFDS * sizeof(FD)); - memset(static_cast(fd), 0, MAXFDS * sizeof(FD)); - process_args(&version, argument_descriptions, n_argument_descriptions, argv); - - if (!drand_seed) { - ts::Random::seed((long)time(nullptr)); - } else { - ts::Random::seed((long)drand_seed); - } - if (zipf != 0.0) { - build_zipf(); - } - int max_fds = max_limit_fd(); - if (verbose) { - printf("maximum of %d connections\n", max_fds); - } - signal(SIGPIPE, SIG_IGN); - start_time = now = ink_get_hrtime(); - - urls_mode = n_file_arguments || *urls_file; - nclients = client_rate ? 0 : nclients; - - if (!local_host[0]) { - if (gethostname(local_host, sizeof(local_host)) != 0) { - panic_perror("gethostname failed"); - } - } - - local_addr = get_addr(local_host); - if (!proxy_host[0]) { - strncpy(proxy_host, local_host, sizeof(proxy_host) - 1); - proxy_host[sizeof(proxy_host) - 1] = 0; - } - if (proxy_port) { - proxy_addr = get_addr(proxy_host); - } - - if (!urls_mode) { - if (compd_port) { - build_response(); - open_server(compd_port, accept_compd); - } else { - if (!server_port) { - server_port = proxy_port + 1000; - } - build_response(); - if (!only_clients) { - for (int retry = 0; retry < 20; retry++) { - server_fd = open_server(server_port + retry, accept_read); - if (server_fd < 0) { - if (server_fd == -EADDRINUSE) { - continue; - } - panic_perror("open_server"); - } - break; - } - } - bandwidth_test_to_go = bandwidth_test; - if (!only_server) { - if (proxy_port) { - for (int i = 0; i < nclients; i++) { - make_bfc_client(proxy_addr, proxy_port); - } - } - } - } - } else { - if (check_content) { - build_response(); - } - follow = follow_arg; - follow_same = follow_same_arg; - uniq_urls = new UrlHashTable; - defered_urls = (char **)malloc(sizeof(char *) * MAX_DEFERED_URLS); - average_over = 1; - if (*urlsdump_file) { - urlsdump_fp = fopen(urlsdump_file, "w"); - if (!urlsdump_fp) { - panic_perror("fopen urlsdump file"); - } - } - if (*urls_file) { - FILE *fp = fopen(urls_file, "r"); - if (!fp) { - panic_perror("fopen urls file"); - } - if (get_defered_urls(fp)) { - fclose(fp); - } else { - urls_fp = fp; - } - } - for (unsigned i = 0; i < n_file_arguments; i++) { - char sche[8], host[512], port[10], path[512], frag[512], quer[512], para[512]; - int xsche, xhost, xport, xpath, xfrag, xquer, xpar, rel, slash; - ink_web_decompose_url(file_arguments[i], sche, host, port, path, frag, quer, para, &xsche, &xhost, &xport, &xpath, &xfrag, - &xquer, &xpar, &rel, &slash); - if (xhost) { - strcpy(current_host, host); - } - } - for (unsigned i = 0; i < n_file_arguments; i++) { - make_url_client(file_arguments[i]); - } - } - - int t = now / HRTIME_SECOND; - int tclient = now / HRTIME_SECOND; - int start = now / HRTIME_SECOND; - while (1) { - if (poll_loop()) { - break; - } - int t2 = now / HRTIME_SECOND; - if (urls_fp && n_defered_urls < MAX_DEFERED_URLS - DEFERED_URLS_BLOCK - 2) { - if (get_defered_urls(urls_fp)) { - fclose(urls_fp); - urls_fp = nullptr; - } - } - if ((!urls_mode || client_rate) && interval && t + interval <= t2) { - t = t2; - interval_report(); - } - if (t2 != tclient) { - for (int i = 0; i < client_rate * (t2 - tclient); i++) { - if (!urls_mode) { - make_bfc_client(proxy_addr, proxy_port); - } else { - undefer_url(true); - } - } - tclient = t2; - } - if (test_time) { - if (t2 - start > test_time) { - done(); - } - } - if (is_done()) { - done(); - } - } - - return 0; -} - -/*---------------------------------------------------------------------------* - - int ink_web_decompose_url(...) - - This function takes an input URL in src_url and splits it into its - component parts, including a scheme, host, port, path, fragment, - query, and parameters. you must pass in buffers for each of these. - If you pass in a nullptr pointer for any of these, it will not be - returned. - - The flags "sche_exists", etc. tell you if that part of the URL was - found. Each unfound part (with a non-nullptr buffer) will also contain - the empty string '\0'. - - The flag "relative_url" indicates that the src_url did not start - with a scheme. (This is kind of redundant with sche_exists but is - the general way to do it.) - - The flag "leading_slash" indicates that the path began with a - leading slash. - - mep - 4/15/96 - - *---------------------------------------------------------------------------*/ - -static void -ink_web_decompose_url(const char *src_url, char *sche, char *host, char *port, char *path, char *frag, char *quer, char *para, - int *real_sche_exists, int *real_host_exists, int *real_port_exists, int *real_path_exists, - int *real_frag_exists, int *real_quer_exists, int *real_para_exists, int *real_relative_url, - int *real_leading_slash) -/* - * Input: src_url - * Outputs: every other argument - * - * You may pass in nullptr pointers for any of: sche, host, port, path, - * frag, quer, or para, and they will not be returned. - * - * - * According to the HTML Sourcebook, a URL consists: - * - * http://www.address.edu:80/path/subdir/file.ext?query;params#fragment - * aaaa bbbbbbbbbbbbbbb cc dddddddddddddddddddd eeeee ffffff gggggggg - * - * where - * a = scheme - * b = host - * c = port - * d = path - * e = query - * f = params - * g = fragment - * - * Order of parsing is: fragment, scheme, host, port, params, query, path - * - * Note that the hostname:port part may contain something like: - * user@pass:hostname:port - * bbbbbbbbbbbbbbbbbb cccc - * i.e., the port is the thing after the _last_ colon in this part - * - */ -{ - const char *start = src_url; - int len = strlen(src_url); - const char *end = start + len; - const char *ptr = start; - const char *ptr2, *temp, *temp2; - const char *sche1 = nullptr, *sche2 = nullptr; - const char *host1 = nullptr, *host2 = nullptr; - const char *port1 = nullptr, *port2 = nullptr; - const char *path1 = nullptr, *path2 = nullptr; - const char *frag1 = nullptr, *frag2 = nullptr; - const char *quer1 = nullptr, *quer2 = nullptr; - const char *para1 = nullptr, *para2 = nullptr; - bool fail = false; - int num; - int sche_exists = 0; - int host_exists = 0; - int port_exists = 0; - int path_exists = 0; - int frag_exists = 0; - int quer_exists = 0; - int para_exists = 0; - int leading_slash = 0; - - temp2 = ptr; - /* strip fragments "#" off the end */ - while (ptr < end) { - if (*ptr == '#') { - frag1 = ptr + 1; - frag2 = end; - frag_exists = 1; - end = ptr; - } - ptr++; - } - ptr = temp2; - - /* decide if there is a sche, i.e. if it's an absolute url */ - /* find end of sche */ - fail = false; - temp2 = ptr; - while ((ptr < end) && !fail) { - if (*ptr == ':') { - sche1 = start; - sche2 = ptr; - ptr++; /* to continue to parse, skip the : */ - sche_exists = 1; - fail = true; - } else if ((!ParseRules::is_alpha(*ptr) && (*ptr != '+') && (*ptr != '.') && (*ptr != '-')) || (ptr == end)) { - sche_exists = 0; - fail = true; - } else { - ptr++; - } - } - if (sche_exists == 0) { - ptr = temp2; - } - - /* find start of host */ - fail = false; - temp2 = ptr; - while ((ptr < end - 1) && !fail) { - if (*(ptr + 0) == '/') { - if (*(ptr + 1) == '/') { - host1 = ptr + 2; - ptr += 2; /* skip "//" */ - host_exists = 1; - fail = true; - } else { - /* this is the start of a path, not a host */ - host_exists = 0; - fail = true; - } - } else { - ptr++; - } - } - - /* find end of host */ - if (host_exists == 1) { - while ((ptr < end) && (host2 == nullptr)) { - if (*ptr == '/') { - /* "/" marks the start of the path */ - host2 = ptr; /* just so we quit out of the loop */ - } else { - ptr++; - } - } - if (host2 == nullptr) { - host2 = end; - } - - if (host_exists == 1) { - temp = host2 - 1; - /* remove trailing dots from host */ - while ((temp > host1) && (*temp == '.')) { - temp--; - host2--; - } - - /* find start & end of port */ - ptr2 = host1; - temp = host2; - while (ptr2 < temp) { - if (*ptr2 == ':') { - port1 = ptr2 + 1; - port2 = temp; - host2 = ptr2; - port_exists = 1; - } - ptr2++; - } - } - } - if (host_exists == 0) { - ptr = temp2; - } - - temp2 = ptr; - /* strip query "?" off the end */ - while (ptr < end) { - if (*ptr == '?') { - quer1 = ptr + 1; - quer2 = end; - quer_exists = 1; - end = ptr; - } - ptr++; - } - ptr = temp2; - - temp2 = ptr; - /* strip parameters ";" off the end */ - while (ptr < end) { - if (*ptr == ';') { - para1 = ptr + 1; - para2 = end; - para_exists = 1; - end = ptr; - } - ptr++; - } - ptr = temp2; - - /* the path is the remainder of the string */ - /* don't include any leading slash */ - if (ptr < end) { - if (*ptr == '/') { - leading_slash = 1; - path1 = ptr + 1; - path2 = end; - path_exists = 1; - } else { - path1 = ptr; - path2 = end; - path_exists = 1; - } - } else { - path1 = end; - path2 = end; - path_exists = 0; - } - - if (sche_exists != 1) { - *real_relative_url = 1; - } else { - *real_relative_url = 0; - } - - /* extract strings for scheme, host, port, path, etc */ - - if (sche != nullptr) { - if (sche_exists) { - num = sche2 - sche1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(sche, sche1, num + 1); - *(sche + num) = '\0'; - - /* make scheme lowercase */ - char *p = sche; - while (*p) { - *p = ParseRules::ink_tolower(*p); - p++; - } - } else { - *sche = 0; - } - } - - if (host != nullptr) { - if (host_exists) { - num = host2 - host1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(host, host1, num + 1); - *(host + num) = '\0'; - - /* make hostname lowercase */ - char *p = host; - while (*p) { - *p = ParseRules::ink_tolower(*p); - p++; - } - } else { - *host = 0; - } - } - - if (port != nullptr) { - if (port_exists) { - num = port2 - port1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(port, port1, num + 1); - *(port + num) = '\0'; - } else { - *port = 0; - } - } - - if (path != nullptr) { - if (path_exists) { - num = path2 - path1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(path, path1, num + 1); - *(path + num) = '\0'; - } else { - *path = 0; - } - } - - if (frag != nullptr) { - if (frag_exists) { - num = frag2 - frag1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(frag, frag1, num + 1); - *(frag + num) = '\0'; - } else { - *frag = 0; - } - } - - if (quer != nullptr) { - if (quer_exists) { - num = quer2 - quer1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(quer, quer1, num + 1); - *(quer + num) = '\0'; - } else { - *quer = 0; - } - } - - if (para != nullptr) { - if (para_exists) { - num = para2 - para1; - if (num > MAX_URL_LEN - 1) { - num = MAX_URL_LEN - 1; - } - strncpy(para, para1, num + 1); - *(para + num) = '\0'; - } else { - *para = 0; - } - } - *real_sche_exists = sche_exists; - *real_host_exists = host_exists; - *real_port_exists = port_exists; - *real_path_exists = path_exists; - *real_frag_exists = frag_exists; - *real_quer_exists = quer_exists; - *real_para_exists = para_exists; - *real_leading_slash = leading_slash; -} /* End ink_web_decompose_url */ - -#if 0 /* debugging */ -/*---------------------------------------------------------------------------* - - void ink_web_dump_url_components(FILE *fp, InkWebURLComponents *c) - - This routine writes a readable representation of the URL components - pointed to by on the file pointer . - - *---------------------------------------------------------------------------*/ - -static void ink_web_dump_url_components(FILE *fp, InkWebURLComponents *c) -{ - fprintf(fp,"sche:'%s', exists %d\n",c->sche,c->sche_exists); - fprintf(fp,"host:'%s', exists %d\n",c->host,c->host_exists); - fprintf(fp,"port:'%s', exists %d\n",c->port,c->port_exists); - fprintf(fp,"path:'%s', exists %d\n",c->path,c->path_exists); - fprintf(fp,"quer:'%s', exists %d\n",c->quer,c->quer_exists); - fprintf(fp,"frag:'%s', exists %d\n",c->frag,c->frag_exists); - fprintf(fp,"para:'%s', exists %d\n",c->para,c->para_exists); - - fprintf(fp,"rel_url:%d\n",c->rel_url); - fprintf(fp,"leading_slash:%d\n",c->leading_slash); - - fprintf(fp,"\n"); -} /* End ink_web_dump_url_components */ - -#endif - -/*---------------------------------------------------------------------------* - - int ink_web_canonicalize_url(...) - - Inputs: base_url, emb_url, max_dest_url_len. - Output: dest_url. - - This function takes a base url and an embedded url, and produces an - absolute url as specified in RFC 1808, "Relative Uniform Resource - Locators". - - A base url is often the url of a document and an embedded url is an - incomplete reference to a secondary document, often in the same - directory. Together they completely specify an absolute reference to - the secondary document. - - For instance, - base_url "http://inktomi.com/~mep" - emb_url: "path1/path2/foo.html" - - becomes - - dest_url: "http://inktomi.com/~mep/path1/path2/foo.html" - - This function also applies "ink_web_escapify()" to the dest_url. - - You must supply the buffer dest_url and its size, max_dest_url_len. - - mep - 4/15/96 - - *---------------------------------------------------------------------------*/ - -static void -ink_web_canonicalize_url(const char *base_url, const char *emb_url, char *dest_url, int max_dest_url_len) -{ - int doff; - InkWebURLComponents base, emb; - char temp[MAX_URL_LEN + 1], temp2[MAX_URL_LEN + 1]; - int leading_slash, use_base_sche, use_base_host, use_base_path, use_base_quer, use_base_para, use_base_frag; - int host_last = 0; - - doff = 0; - - /* Initialize Component Values */ - - leading_slash = 0; - - /* Decompose The Base And Embedded URLs */ - - ink_web_decompose_url_into_structure(base_url, &base); - ink_web_decompose_url_into_structure(emb_url, &emb); - - /* Print Out Components */ - - /* Select Which Components To Use From Base & Embedded URL */ - - dest_url[0] = '\0'; - - use_base_path = 0; - use_base_quer = 0; - use_base_para = 0; - use_base_frag = 0; - - if (!emb.sche_exists && !emb.path_exists && !emb.host_exists && !emb.quer_exists && !emb.frag_exists && !emb.para_exists) { - /* 2a: if the embedded URL is empty, take everything from the base */ - - use_base_sche = 1; - use_base_host = 1; - use_base_path = 1; - use_base_quer = 1; - use_base_para = 1; - use_base_frag = 1; - } else if (emb.sche_exists && ((strcasecmp(emb.sche, "telnet") == 0) || (strcasecmp(emb.sche, "mailto") == 0) || - (strcasecmp(emb.sche, "news") == 0))) { - const char *p = emb_url; - char *q = dest_url; - while (*p) { - *q++ = ParseRules::ink_tolower(*p++); - } - return; - } else if (emb.sche_exists && !(((strcasecmp(emb.sche, "http") == 0) && !emb.host_exists))) - - { - /* 2b: not good enough, because things like 'http:overview.html' */ - - use_base_sche = 0; - use_base_host = 0; - use_base_path = 0; - use_base_quer = 0; - use_base_para = 0; - use_base_frag = 0; - } else { - use_base_sche = 1; - - /* step 3 - if emb_host non-empty, skip to 7 */ - - if (emb.host_exists) { - use_base_host = 0; - } else { - use_base_host = 1; - - /* step 4 - if emb_path preceded by slash, skip to 7 */ - - if (emb.leading_slash != 1) { - /* step 5 */ - - if (!emb.path_exists) { - use_base_path = 1; - - if (emb.para_exists) { - /* 5a - if emb_para non-empty, skip to 7 */ - - use_base_para = 0; - } else { - /* otherwise use base_para */ - - use_base_para = 1; - - if (emb.quer_exists) { - /* 5b - if emb_quer non-empty, skip to 7 */ - - use_base_quer = 0; - } else { - /* otherwise use base query */ - - use_base_quer = 1; - } - } - } else { - use_base_path = 0; - - /* step 6 */ - /* create combined path */ - /* remove last segment of base_path */ - - remove_last_seg(base.path, temp); - remove_multiple_slash(temp, temp2); - - /* append emb_path */ - - strcat(temp2, emb.path); - - /* remove "." and ".." */ - - ink_web_remove_dots(temp2, emb.path, &leading_slash, MAX_URL_LEN); - emb.path_exists = 1; - emb.leading_slash = base.leading_slash; - } /* 5 */ - } /* 4 */ - } /* 3 */ - } - - /* step 7 - combine parts */ - - if (use_base_sche) { - if (base.sche_exists) { - append_string(dest_url, base.sche, &doff, MAX_URL_LEN); - append_string(dest_url, ":", &doff, MAX_URL_LEN); - host_last = 0; - } - } else { - if (emb.sche_exists) { - append_string(dest_url, emb.sche, &doff, MAX_URL_LEN); - append_string(dest_url, ":", &doff, MAX_URL_LEN); - host_last = 0; - } - } - - if (use_base_host) { - if (base.host_exists) { - append_string(dest_url, "//", &doff, MAX_URL_LEN); - append_string(dest_url, base.host, &doff, MAX_URL_LEN); - if ((base.port_exists) && (strcmp(base.port, "80") != 0)) { - append_string(dest_url, ":", &doff, MAX_URL_LEN); - append_string(dest_url, base.port, &doff, MAX_URL_LEN); - } - host_last = 1; - } - } else { - if (emb.host_exists) { - append_string(dest_url, "//", &doff, MAX_URL_LEN); - append_string(dest_url, emb.host, &doff, MAX_URL_LEN); - if ((emb.port_exists) && (strcmp(emb.port, "80") != 0)) { - append_string(dest_url, ":", &doff, MAX_URL_LEN); - append_string(dest_url, emb.port, &doff, MAX_URL_LEN); - } - host_last = 1; - } - } - - if (use_base_path) { - if (base.path_exists) { - if (base.leading_slash) { - append_string(dest_url, "/", &doff, MAX_URL_LEN); - } - - ink_web_unescapify_string(temp, base.path, MAX_URL_LEN); - ink_web_escapify_string(base.path, temp, max_dest_url_len); - append_string(dest_url, base.path, &doff, MAX_URL_LEN); - host_last = 0; - } - } else { - if (emb.path_exists) { - if (emb.leading_slash) { - append_string(dest_url, "/", &doff, MAX_URL_LEN); - } - ink_web_unescapify_string(temp, emb.path, MAX_URL_LEN); - ink_web_escapify_string(emb.path, temp, max_dest_url_len); - append_string(dest_url, emb.path, &doff, MAX_URL_LEN); - host_last = 0; - } - } - - if (use_base_para) { - if (base.para_exists) { - append_string(dest_url, ";", &doff, MAX_URL_LEN); - append_string(dest_url, base.para, &doff, MAX_URL_LEN); - host_last = 0; - } - } else { - if (emb.para_exists) { - append_string(dest_url, ";", &doff, MAX_URL_LEN); - append_string(dest_url, emb.para, &doff, MAX_URL_LEN); - host_last = 0; - } - } - - if (use_base_quer) { - if (base.quer_exists) { - append_string(dest_url, "?", &doff, MAX_URL_LEN); - append_string(dest_url, base.quer, &doff, MAX_URL_LEN); - host_last = 0; - } - } else { - if (emb.quer_exists) { - append_string(dest_url, "?", &doff, MAX_URL_LEN); - append_string(dest_url, emb.quer, &doff, MAX_URL_LEN); - host_last = 0; - } - } - - if (use_base_frag) { - if (base.frag_exists) { - append_string(dest_url, "#", &doff, MAX_URL_LEN); - append_string(dest_url, base.frag, &doff, MAX_URL_LEN); - host_last = 0; - } - } else { - if (emb.frag_exists) { - append_string(dest_url, "#", &doff, MAX_URL_LEN); - append_string(dest_url, emb.frag, &doff, MAX_URL_LEN); - host_last = 0; - } - } - - if (host_last) { - append_string(dest_url, "/", &doff, MAX_URL_LEN); - } -} - -/*---------------------------------------------------------------------------* - - int ink_web_decompose_url_into_structure(char *url, InkWebURLComponents *c) - - This routine takes a URL and violently tears apart its molecular structure, - placing the URL components in the InkWebURLComponents structure pointed to - by . Flags in the structure indicate whether individual fields are - valid or not. - - *---------------------------------------------------------------------------*/ - -static void -ink_web_decompose_url_into_structure(const char *url, InkWebURLComponents *c) -{ - ink_web_decompose_url(url, c->sche, c->host, c->port, c->path, c->frag, c->quer, c->para, &(c->sche_exists), &(c->host_exists), - &(c->port_exists), &(c->path_exists), &(c->frag_exists), &(c->quer_exists), &(c->para_exists), - &(c->rel_url), &(c->leading_slash)); - - c->is_path_name = 1; - if (c->sche_exists && - ((strcasecmp(c->sche, "mailto") == 0) || (strcasecmp(c->sche, "telnet") == 0) || (strcasecmp(c->sche, "news") == 0))) { - c->is_path_name = 0; - } -} /* End ink_web_decompose_url_into_structure */ - -/*---------------------------------------------------------------------------* - - int ink_web_remove_dots(char *src, char *dest, int *leadingslash, - int max_dest_len) - - This routine takes a path and interprets "." and ".." segments, returning - an appropriately parsed path. It is a warning to pass a path that resolves - to a leading "..". Inputs are the src path and the length of the dest - buffer. Return values are a string written into the dest buffer and - the leadingslash flag, which indicates if the src (and the dest) have a - leading slash, and are therefore not relative paths. - - Basically, these sequences: ".." and "." both - turn into "" where is beginning-or-string or a complete segment, - and is end-of-string or a complete segment. - - e.g. - path1/../path2 -> path2 - /path1/../path2 -> /path2 - /path1/path2/.. -> /path1 - path1/./path2 -> path1/path2 - path1/path2/. -> path1/path2 - ./path1/path2 -> path1/path2 - ./path1 -> path1 - /./path1 -> /path1 - - It is also a warning to pass a path whose returned value needs to be - truncated to fit into max_dest_len characters. - - mep - 4/15/96 - - *---------------------------------------------------------------------------*/ - -/* types of path segment */ -#define NORMAL 0 -#define DOT 1 -#define DOTDOT 2 -#define ZAP 3 -#define ERROR 4 - -/* We statically allocate this many - if we need more, we dynamically */ -/* allocate them. */ -#define STATIC_PATH_LEVELS 256 - -static int -ink_web_remove_dots(char *src, char *dest, int *leadingslash, int max_dest_len) -{ - char *ptr, *end; - int free_flag = 0; - int scount, segstart, zapflag, doff, num; - int temp, i; - int error = 0; - - /* offsets to each path segment */ - char **seg, *segstatic[STATIC_PATH_LEVELS]; - - /* type of each segment is a ".." */ - int *type, typestatic[STATIC_PATH_LEVELS]; - - *leadingslash = 0; - - /* first quickly count the "/"s to get lower bound on # of path levels */ - ptr = src; - end = src + strlen(src); - scount = 0; - while (ptr < end) { - if (*ptr++ == '/') { - scount++; - } - } - scount++; /* adding one to this makes it a lower bound for any case */ - - if (scount <= STATIC_PATH_LEVELS) { - /* we can use the statically allocated ones */ - seg = segstatic; - type = typestatic; - } else { - /* too many levels of path - must dynamically allocate */ - seg = (char **)malloc(scount * sizeof(char *)); - type = (int *)malloc(scount * sizeof(int)); - free_flag = 1; - } - - /* Determine starts of each path segment. - * A segment is defined as: - * "foo/" in the string "foo/", where: - * is , or a single "/" - * is , or another segment. - * "foo" can be "." or ".." - * Makes my head hurt just to think about it. - * - */ - ptr = src; - scount = 0; - /* a segstart starts with start-of-string or a '/' */ - segstart = 1; - while (ptr < end) { - if (*ptr == '/') { - /* include leading '/' in first segment */ - if (ptr == src) { - *leadingslash = 1; - } - segstart = 1; - } else if (segstart == 1) { - seg[scount++] = ptr; - segstart = 0; - } else { - /* this is neither a "/" nor the first char of another segment */ - } - ptr++; - } - /* Now scount is an accurate count of the segments we have found, */ - /* not just that lower bound we quickly got before */ - - /* now figure out if segments are "..", ".", or normal */ - /* ZAP the "."s in place */ - for (i = 0; i < scount; i++) { - ptr = seg[i]; - if (*ptr == '.') { - if ((ptr == end - 1) || (*(ptr + 1) == '/')) { - /* it's a "." */ - type[i] = DOT; - } else if (((ptr == end - 2) && (*(ptr + 1) == '.')) || ((ptr < end - 2) && (*(ptr + 1) == '.') && (*(ptr + 2) == '/'))) { - /* it's a ".." */ - type[i] = DOTDOT; - } else { - type[i] = NORMAL; - } - } else { - /* it's not a special segment */ - type[i] = NORMAL; - } - } - /* now ZAP each DOT, and each NORMAL following a DOTDOT */ - for (i = 0; i < scount; i++) { - if (type[i] == DOT) { - type[i] = ZAP; - } else if (type[i] == DOTDOT) { - /* got a DOTDOT, count back to find first NORMAL segment */ - temp = i - 1; - zapflag = 0; - while ((temp >= 0) && (zapflag == 0)) { - if (type[temp] == NORMAL) { - /* found a NORMAL one, ZAP this pair */ - type[temp] = ZAP; - type[i] = ZAP; - zapflag = 1; - } else { - temp--; - } - } - if (zapflag == 0) { - type[i] = ERROR; - error = 1; - } - } - } - - /* now write out the fixed path */ - doff = 0; - *dest = 0; - if (*leadingslash) { - strncpy(dest + doff, "/", 2); - doff++; - } - for (i = 0; i < scount; i++) { - if ((type[i] == NORMAL) || (type[i] == ERROR)) { - if (i == scount - 1) { - num = (int)(end - seg[i]); - } else { - num = (int)(seg[i + 1] - seg[i]); - } - - /* truncate if nec. */ - if (doff + num > max_dest_len) { - num = max_dest_len - doff; - } - - strncpy(dest + doff, seg[i], num + 1); - doff += num; - } else if (type[i] == DOT) { - /* if you get here, it indicates an algorithmic error in this routine */ - panic("ink_web_remove_dots - single dot remaining in string"); - } else if (type[i] == DOTDOT) { - /* if you get here, it indicates an algorithmic error in this routine */ - panic("ink_web_remove_dots - double dot remaining in string"); - } - } - - if (free_flag) { - free(seg); - free(type); - } - - return (error); -} - -/*---------------------------------------------------------------------------* - - int ink_web_unescapify_string(...) - - Takes a string that has special characters turned to %AB format - and converts them back to single special characters. See - ink_web_escapify_string() above. - - mep - 4/15/96 - - *---------------------------------------------------------------------------*/ - -static int -ink_web_unescapify_string(char *dest_in, char *src_in, int max_dest_len) -{ - char *src = src_in; - char *dest = dest_in; - const char *c1; - const char *c2; - int quit = 0; - int dcount = 0; - int num = 0; - int dig1 = 0; - int dig2 = 0; - - while ((*src != 0) && !quit) { - if (*src == '%') { - /* found start of an escape sequence, unescape it */ - if ((*(src + 1) != 0) && (*(src + 2) != 0)) { - c1 = strchr(hexdigits, *(src + 1)); - c2 = strchr(hexdigits, *(src + 2)); - if ((c1 == nullptr) || (c2 == nullptr)) { - ink_warning("got escape sequence but no hex digits in:%s", src_in); - if (dcount + 1 < max_dest_len) { - *(dest++) = *src; - dcount++; - } else { - ink_warning("ink_web_unescapify_string had to truncate:%s", src_in); - quit = 1; - } - } else { - /* check if hex digits lowercase */ - dig1 = (int)(c1 - hexdigits); - dig2 = (int)(c2 - hexdigits); - if (dig1 > 15) { - dig1 -= 6; - } - if (dig2 > 15) { - dig2 -= 6; - } - /* this is the ascii char */ - num = 16 * dig1 + dig2; - - if (!strchr(dontunescapify, num)) { - /* unescapify the escape sequence you found */ - if (dcount + 1 < max_dest_len) { - *(dest++) = num; - dcount++; - src += 2; - } else { - ink_warning("ink_web_escapify_string had to truncate:%s", src_in); - quit = 1; - } - } else { - /* don't unescapify these, just pass the escape sequence */ - if (dcount + 3 < max_dest_len) { - *(dest++) = '%'; - *(dest++) = hexdigits[dig1]; - *(dest++) = hexdigits[dig2]; - dcount += 3; - src += 2; - } else { - ink_warning("ink_web_unescapify_string had to truncate:%s", src_in); - quit = 1; - } - } - } - } else { - ink_warning("got escape sequence but no hex digits (too near end of string) in:%s", src_in); - if (dcount + 1 < max_dest_len) { - *dest++ = *src; - dcount++; - } else { - ink_warning("ink_web_unescapify_string had to truncate:%s", src_in); - quit = 1; - } - } - } else { - if (dcount + 1 < max_dest_len) { - *dest++ = *src; - dcount++; - } else { - ink_warning("ink_web_unescapify_string had to truncate:%s", src_in); - quit = 1; - } - } - src++; - } - /* terminate string */ - if (dcount < max_dest_len) { - *dest = 0; - } else { - *(dest_in + max_dest_len) = 0; - } - - return (quit); -} - -/*---------------------------------------------------------------------------* - - int ink_web_escapify_string(...) - - This functions takes an input src_in and converts all special - characters to % form. - - Special characters are everything that is not: - #$-_.+!*'(),;/?:@=& or - or - - - e.g. "abcd fghi[klmn^" -> "abcd%20fghi%5Bklmn%5E" - - You must supply the buffer dest_in, with a size of max_dest_len. If - the unescapified string grows larger than this, it will be truncated - and you will get a warning. - - mep - 4/15/96 - - *---------------------------------------------------------------------------*/ - -static int -ink_web_escapify_string(char *dest_in, char *src_in, int max_dest_len) -{ - int d1, d2; - char *src = src_in; - char *dest = dest_in; - int dcount = 0; - int quit = 0; - - while ((*src != 0) && (dcount < max_dest_len) && (quit == 0)) { - if ((char *)strchr(dontescapify, *src) || ParseRules::is_alpha(*src) || ParseRules::is_digit(*src)) { - /* this is regular character, don't escapify it */ - if (dcount + 1 < max_dest_len) { - *dest++ = *src; - dcount++; - } else { - ink_warning("ink_web_escapify_string (1) had to truncate:'%s'", src_in); - quit = 1; - } - } else { - d1 = *src / 16; - d2 = *src % 16; - if (dcount + 3 < max_dest_len) { - *dest++ = '%'; - *dest++ = hexdigits[d1]; - *dest++ = hexdigits[d2]; - /* fprintf(stderr,"%d %d %c %c\n",d1,d2,hexdigits[d1],hexdigits[d2]);*/ - dcount += 3; - } else { - ink_warning("ink_web_escapify_string (2) had to truncate:'%s'", src_in); - quit = 1; - } - } - src++; - } - /* terminate string */ - if (dcount < max_dest_len) { - *dest = 0; - } else { - *(dest_in + max_dest_len - 1) = 0; - } - - return (quit); -}