diff --git a/NOTICE b/NOTICE index 453af9b4419..adb427a534b 100644 --- a/NOTICE +++ b/NOTICE @@ -9,6 +9,7 @@ This product includes software developed at - Comcast - LinkedIn - Mike Pall + - GoDaddy ~~~ @@ -44,8 +45,8 @@ Copyright (C) 2012 Oregon Health & Science University ~~~ -healthcheck Plugin developed by GoDaddy. -Copyright (C) 2012 GoDaddy. +cache-key-genid Plugin developed by GoDaddy +Copyright (C) 2013 GoDaddy Operating Company, LLC ~~~ @@ -78,7 +79,7 @@ Copyright (C) 2014 Yahoo! Inc. All rights reserved. ~~~ healthchecks: Plugin for ATS healthchecks. -Copyright (C) 2012 Go Daddy Operating Company, LLC +Copyright (C) 2012 GoDaddy Operating Company, LLC ~~~ diff --git a/plugins/experimental/cache-key-genid/README.md b/plugins/experimental/cache-key-genid/README.md new file mode 100644 index 00000000000..4302710b82f --- /dev/null +++ b/plugins/experimental/cache-key-genid/README.md @@ -0,0 +1,112 @@ +ats-plugin-cache-key-genid +========================== + +Apache Traffic Server (ATS) plugin to modify the URL used as the cache key by adding a generation ID tag to the hostname. +This is useful when ATS is running in reverse proxy mode and proxies several (ie hundreds or thousands) of hosts. +Each host has a generation ID (genid) that's stored in a small embedded kytocabinet database. +Without this plugin, the CacheUrl is set to the requested URL. + +For example, if the requested url is http://example.tld/foobar.css, then natively the CacheUrl is http://example.tld/foobar.css. +With this plugin, the CacheUrl is set to http://example.tld.#/foobar.css, where # is an integer representing example.tld's genid. + +## License + +Copyright © 2013 Go Daddy Operating Company, LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +## Why use this plugin? + +The simple answer: fast clear cache operation. + +By incrementing a hosts genid, you instantly invalidate all of that host's files. Not really invalidate, but make it impossible to find. + +When an HTTP request comes into ATS, it takes the URL and generates the CacheUrl, then looks in the cache for the key matching md5(CacheUrl). +If CacheUrl changes, the md5 hash changes, and it won't find old copies. + +This method is exceptionally faster than http://localhost/delete_regex=http://example\.tld/.*. ATS's delex_regex method performs a full cache scan. +It looks at every file in the cache, determines if it matches the regular expresssion, and then deletes the file. Using ats-plugin-cache-key-genid, no cache +scan is required. You simply increment a value in a kytocabinet database and done. + +## The high level workflow of what this plugin does + +The solution is to not actually delete the files, but instead increment a counter in an embedded database local to each ATS server. +ats-plugin-cache-key-genid modifies the cache-key to include the host's genid. + +* Intercept incoming http requests +* Hook just before the cache key is set +* The cache key is effectively md5(url) or md5(http://host/path). Change it to md5(http://host.genid/path) + * Take the url + * Find the host + * Lookup the host's genid in an embeded, super fast, super lightweight, mostly in memory, key/value pair kyotocabinet database + * Make a newurl string by injecting the host's genid just after the host in the original url. ie http://foo.com/style.css becomes http://foo.com.2/style.css +* Call TSCacheUrlSet with the newurl + +How do you accomplish the genid increment? Below we give you the kytocabinet command to do so. Presumably, you have some form of user interface where users request +a Clear Cache operation. You must relay this to your ATS server(s) and command them to increment that host's genid. There are many designs available for this. +You either push or pull the command. This might be accomplished by an event bus, for example. Passing these requests to the ATS servers is beyond +the scope of this write up. + +## Requires + +* Apache Traffic Server (http://trafficserver.apache.org/) + * Note: we used ATS 3.0 to build this, it may work with other versions. +* Kyto Cabinet (http://fallabs.com/kyotocabinet/) + * Note: we used kyotocabinet-1.2.76 to build ours, it may work with other versions. + +The instructions below assume you have Apache Traffic Server installed in /opt/ats and Kyoto Cabinet installed in /opt/kyotocabinet. If your installs are in different directories, change the paths in the following commands accordingly. + +## to compile +```bash +/opt/ats/bin/tsxs -o cache-key-genid.so -c cache-key-genid.c +``` + +## to compile in libkyotocabinet +```bash +gcc -shared -Wl,-E -o cache-key-genid.so cache-key-genid.lo /opt/kyotocabinet/lib/libkyotocabinet.a +``` + +## to put into libexec/trafficserver/ +```bash +sudo /opt/ats/bin/tsxs -o cache-key-genid.so -i +``` + +## to create the kyotocabinet database +```bash +sudo /opt/kyotocabinet/bin/kcpolymgr create -otr /opt/ats/var/trafficserver/genid.kch +# replace "ats:disk" with the user:group that runs your ATS server +sudo chown ats:disk /opt/ats/var/trafficserver/genid.kch +``` + +## to add/modify a record in the kyotocabinet database +```bash +sudo /opt/kyotocabinet/bin/kcpolymgr set -onl /opt/ats/var/trafficserver/genid.kch example.tld 5 +``` + +## to get a record from the kyotocabinet database +```bash +/opt/kyotocabinet/bin/kcpolymgr get -onl /opt/ats/var/trafficserver/genid.kch example.tld 2>/dev/null +``` + +## Set ATS debug to ON in records.config like this (do not do this in production): +```bash +CONFIG proxy.config.diags.debug.enabled INT 1 +CONFIG proxy.config.diags.debug.tags STRING cache-key-genid +``` + +If you turn the debug on like this, then you can tail the traffic.out file and witness the discovery of the url and the CacheUrl transformation. +You would not want to run this in production, b/c you'd be writing too much to the log file, which would slow down ATS. +It's great for dev/test/debug, however, so you know it's working well. + + diff --git a/plugins/experimental/cache-key-genid/cache-key-genid.c b/plugins/experimental/cache-key-genid/cache-key-genid.c new file mode 100644 index 00000000000..64b3c062642 --- /dev/null +++ b/plugins/experimental/cache-key-genid/cache-key-genid.c @@ -0,0 +1,223 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* cache-key-genid.c - Plugin to modify the URL used as a cache key for + * requests, without modifying the URL used for actually fetching data from + * the origin server. + */ + +#include +#include +#include +#include "/opt/kyotocabinet/include/kclangc.h" + +#define PLUGIN_NAME "cache-key-genid" +#define PLUGIN_VERSION "1.0.6" +#define VENDOR_NAME "GoDaddy.com, LLC" +#define VENDOR_SUPPORT_EMAIL "support@godaddy.com" + +static char genid_kyoto_db[1024]; + +// Find the host in url and set host to it +static void +get_genid_host(char **host, char *url) +{ + char *pt1; + char *pt2; + size_t host_len; + unsigned num = 1; + + pt1 = strstr(url, "//"); + if (pt1) { + pt1 = pt1 + 2; + pt2 = strstr(pt1, "/"); + } + if (pt1 && pt2 && pt2 > pt1) { + *host = calloc(num, pt2 - pt1 + 1); // +1 for null term? + host_len = pt2 - pt1; + strncpy(*host, pt1, host_len); + } +} + +// create a new string from url, injecting gen_id, so http://foo.com/s.css becomes http://foo.com.7/s.css +static char * +get_genid_newurl(char *url, char *host, int gen_id) +{ + char *pt1; + char *pt2; + char *newurl; + size_t newurl_len; + unsigned num = 1; + + pt1 = strstr(url, host) + strlen(host); + pt2 = pt1; + // newurl_len = strlen(url) + ceil(log10(gen_id+1)) + 7; // 6 for '/GeNiD' and 1 for '\0' + newurl_len = strlen(url) + ceil(log10(gen_id + 1)) + 2; // 1 for '.' and 1 for '\0' + newurl = calloc(num, newurl_len * sizeof(char)); + // This injects it into the host: + strncpy(newurl, url, pt1 - url); + pt1 = newurl + strlen(newurl); + sprintf(pt1, ".%d", gen_id); + pt1 = newurl + strlen(newurl); + strcpy(pt1, pt2); + return newurl; +} + +/* get_genid + * Looks up the host's genid in the host->genid database + */ +static int +get_genid(char *host) +{ + KCDB *db; + char *vbuf; + size_t vsiz; + int answer; + int host_size; + + /* create the database object */ + db = kcdbnew(); + /* open the database */ + if (!kcdbopen(db, genid_kyoto_db, KCOREADER | KCONOLOCK)) { + TSDebug(PLUGIN_NAME, "could not open the genid database %s\n", genid_kyoto_db); + TSError("[%s] could not open the genid database\n", PLUGIN_NAME); + return 0; + } + vbuf = kcdbget(db, host, strlen(host), &vsiz); + if (vbuf) { + TSDebug(PLUGIN_NAME, "kcdbget(%s) = %s\n", host, vbuf); + answer = (int)strtol(vbuf, NULL, 10); + kcfree(vbuf); + kcdbclose(db); + return answer; + } else { + // do I really want to set a record here? This will make the db very large. + // Will large volumes of lookups on records that don't exist be slower than + // looking up records that do? Probably not, I think it can instantly know. + // Also, opening the DB in 'KCOREADER | KCONOLOCK' mode should be faster and + // possible if not writing to the database + // kcdbset(db, host, 3, "0", 3); + // TSDebug(PLUGIN_NAME, "kcdbset(%s, 0)\n", host); + host_size = strlen(host); + TSDebug(PLUGIN_NAME, "kcdbget(%s) - no record found, len(%d)\n", host, host_size); + } + kcdbclose(db); + return 0; +} + +/* handle_hook + * Fires on TS_EVENT_HTTP_READ_REQUEST_HDR events, gets the effectiveUrl + * finds the host, gets the generation ID, gen_id, for the host + * and runs TSCacheUrlSet to change the cache key for the read + */ +static int +handle_hook(TSCont *contp, TSEvent event, void *edata) +{ + TSHttpTxn txnp = (TSHttpTxn)edata; + char *newurl = 0; + int gen_id; + + char *host = 0; + + char *url; + int url_length; + // size_t newurl_len; + // unsigned num=1; + int ok = 1; + + switch (event) { + case TS_EVENT_HTTP_READ_REQUEST_HDR: + TSDebug(PLUGIN_NAME, "v%s\n", PLUGIN_VERSION); + if (ok) { + url = TSHttpTxnEffectiveUrlStringGet(txnp, &url_length); + if (!url) { + TSError("[%s] could not retrieve request url\n", PLUGIN_NAME); + ok = 0; + } + } + if (ok) { + get_genid_host(&host, url); + if (!host) { + TSError("[%s] could not retrieve request host\n", PLUGIN_NAME); + ok = 0; + } + } + if (ok) { + TSDebug(PLUGIN_NAME, "From url (%s) discovered host (%s)\n", url, host); + gen_id = get_genid(host); + if (gen_id) { + newurl = get_genid_newurl(url, host, gen_id); + // newurl_len = strlen(url) + 6 + ceil(log10(gen_id+1)) + 1; // URL + '/gEnId' + gen_id + '\0' + // newurl = calloc(num, newurl_len * sizeof(char)); + // sprintf(newurl, "%s/gEnId%d", url, gen_id); + } + if (newurl) { + TSDebug(PLUGIN_NAME, "Rewriting cache URL for %s to %s\n", url, newurl); + if (TSCacheUrlSet(txnp, newurl, strlen(newurl)) != TS_SUCCESS) { + TSDebug(PLUGIN_NAME, "Error, unable to modify cache url\n"); + TSError("[%s] Unable to modify cache url from %s to %s\n", PLUGIN_NAME, url, newurl); + ok = 0; + } + } + } + /* Clean up */ + if (url) + TSfree(url); + if (newurl) + TSfree(newurl); + if (host) + TSfree(host); + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + break; + default: + TSAssert(!"Unexpected event"); + ok = 0; + break; + } + return ok; +} + +void +TSPluginInit(int argc, const char *argv[]) +{ + TSPluginRegistrationInfo info; + // KCDB* db; + + info.plugin_name = PLUGIN_NAME; + info.vendor_name = VENDOR_NAME; + info.support_email = VENDOR_SUPPORT_EMAIL; + + if (argc > 1 && strlen(argv[1]) < 1024) { + strcpy(genid_kyoto_db, argv[1]); + /* + db = kcdbnew(); + if (!kcdbopen(db, genid_kyoto_db, KCOWRITER | KCOCREATE)) { + TSError("[%s] plugin registration failed. Could not open %s", PLUGIN_NAME, genid_kyoto_db); + return; + } + kcdbclose(db); + kcdbdel(db); + */ + } else { + TSError("[%s] plugin registration failed. check argv[1] for db path", PLUGIN_NAME); + return; + } + + if (TSPluginRegister(TS_SDK_VERSION_3_0, &info) != TS_SUCCESS) { + TSError("[%s] plugin registration failed. check version.", PLUGIN_NAME); + return; + } + + TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, TSContCreate((TSEventFunc)handle_hook, NULL)); +}