Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 101 additions & 66 deletions doc/admin-guide/plugins/cachekey.en.rst

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions doc/admin-guide/plugins/index.en.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Plugins that are considered stable are installed by default in |TS| releases.
:doc:`Background Fetch <background_fetch.en>`
Proactively fetch content from Origin in a way that it will fill the object into cache.

:doc:`Cache Key Manipulation <cachekey.en>`
Allows some common cache key manipulations based on various HTTP request elements.
:doc:`Cache Key and Parent Selection URL Manipulation <cachekey.en>`
Allows some common cache key or parent selection URL manipulations based on various HTTP request elements.

:doc:`Cache Promotion Policies <cache_promote.en>`
Allows for control over which assets should be written to cache, or not.
Expand Down
69 changes: 56 additions & 13 deletions plugins/cachekey/cachekey.cc
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ getCanonicalUrl(TSMBuffer buf, TSMLoc url, bool canonicalPrefix, bool provideDef
* @param uriType type of the URI used to create the cachekey ("remap" or "pristine")
* @param rri remap request info
*/
CacheKey::CacheKey(TSHttpTxn txn, String separator, CacheKeyUriType uriType, TSRemapRequestInfo *rri)
: _txn(txn), _separator(std::move(separator)), _uriType(uriType)
CacheKey::CacheKey(TSHttpTxn txn, String separator, CacheKeyUriType uriType, CacheKeyKeyType keyType, TSRemapRequestInfo *rri)
: _txn(txn), _separator(std::move(separator)), _uriType(uriType), _keyType(keyType)
{
_key.reserve(512);

Expand All @@ -250,8 +250,9 @@ CacheKey::CacheKey(TSHttpTxn txn, String separator, CacheKeyUriType uriType, TSR
/* Get the URI and header to base the cachekey on.
* @TODO it might make sense to add more supported URI types */

CacheKeyDebug("setting %s from a %s plugin", getCacheKeyKeyTypeName(_keyType), _remap ? "remap" : "global");

if (_remap) {
CacheKeyDebug("setting cache key from a remap plugin");
if (PRISTINE == _uriType) {
if (TS_SUCCESS != TSHttpTxnPristineUrlGet(_txn, &_buf, &_url)) {
/* Failing here is unlikely. No action seems the only reasonable thing to do from within this plug-in */
Expand All @@ -266,7 +267,6 @@ CacheKey::CacheKey(TSHttpTxn txn, String separator, CacheKeyUriType uriType, TSR
}
_hdrs = rri->requestHdrp;
} else {
CacheKeyDebug("setting cache key from a global plugin");
if (TS_SUCCESS != TSHttpTxnClientReqGet(_txn, &_buf, &_hdrs)) {
/* Failing here is unlikely. No action seems the only reasonable thing to do from within this plug-in */
CacheKeyError("failed to get client request handle");
Expand Down Expand Up @@ -745,24 +745,67 @@ CacheKey::appendUaClass(Classifier &classifier)
bool
CacheKey::finalize() const
{
bool res = true;
CacheKeyDebug("finalizing cache key '%s' from a %s plugin", _key.c_str(), (_remap ? "remap" : "global"));
if (TS_SUCCESS != TSCacheUrlSet(_txn, &(_key[0]), _key.size())) {
int len;
char *url = TSHttpTxnEffectiveUrlStringGet(_txn, &len);
if (nullptr != url) {
bool res = false;
String msg;

CacheKeyDebug("finalizing %s '%s' from a %s plugin", getCacheKeyKeyTypeName(_keyType), _key.c_str(),
(_remap ? "remap" : "global"));
switch (_keyType) {
case CACHE_KEY: {
if (TS_SUCCESS == TSCacheUrlSet(_txn, &(_key[0]), _key.size())) {
/* Set cache key succesfully */
msg.assign("set cache key to ").append(_key);
res = true;
} else {
if (_remap) {
/* Remap instance. Always runs first by design (before TS_HTTP_POST_REMAP_HOOK) */
CacheKeyError("failed to set cache key for url %.*s", len, url);
msg.assign("failed to set cache key");
} else {
/* Global instance. We would fail and get here if a per-remap instance has already set the cache key
* (currently TSCacheUrlSet() can be called only once successfully). Don't error, just debug.
* @todo avoid the consecutive attempts and error only on unexpected failures. */
CacheKeyDebug("failed to set cache key for url %.*s", len, url);
msg.assign("failed to set cache key");
}
}
} break;
case PARENT_SELECTION_URL: {
/* parent selection */
const char *start = _key.c_str();
const char *end = _key.c_str() + _key.length();
TSMLoc new_url_loc;
if (TS_SUCCESS == TSUrlCreate(_buf, &new_url_loc)) {
if (TS_PARSE_DONE == TSUrlParse(_buf, new_url_loc, &start, end)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will return an error if the key is not a url. It's rather significantly less picky than it probably ought to be about what constitutes a URL, but if the separator is set to zero and no prefix is used, user input can cause this to fail. Consider these parameters:

@plugin=cachekey.so
    @pparam=--key-type=parent_selection_url
    @pparam=--separator=
    @pparam=--remove-prefix=1

With this request:

http://localhost/://-@@

That creates a URL that doesn't parse. Likewise, using an invalid URL char in the separator will create URLs that don't parse.

@plugin=cachekey.so
    @pparam=--key-type=parent_selection_url
    @pparam=--separator=|

If this cache key genuinely needs to be a valid URL, we should document that and think carefully about what sorts of request URLs could violate that. It might even be worth encoding the components in something like base64 to prevent user input from affecting the parent selection algorithm here.

If it doesn't need to be a valid URL (and I'm not entirely certain why it would need to be a URL, if you're just hashing it later), then we should avoid parsing it as one.

Copy link
Contributor

@jrushford jrushford Aug 28, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alficles Currently proxy/ParentConsistentHash expects that, cache_info_parent_selection_url, is a URL. The function ParentConsistentHash::getPathHash() uses the path in the url to create a hash. So in this case If it fails to parse the key, an error is logged and the parent selection url is not set and the original request url would be used.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What then, should the strategy be around ensuring that a given key will form a URL? The example provided in the docs produces lines that look like:

/hostname/port/path/to/content/querystring

And that's definitely not a URL, although the exceptionally generous parser ATS uses will happily take it. How does a user know that that's a valid URL and what portion of it will be used for parent selection?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alficles The override to the parents selection url needs to be type URL. parent selection then makes a call to ps_url->string_get_ref(&len); which returns a normalized path string with escape sequences which is used to create the PS hash. See ParentConsistentHash::getPathHash(). So it looks like cachekey creates a URL with a path string /hostname/port/path/to/content/querystring which is then normalized with escape sequences as necessary before creating a hash

Copy link
Contributor Author

@gtenev gtenev Aug 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alficles hopefully the following plugin design info brings some clarification.

The cachekey plugin is designed to provide an easy and somewhat structured way to set cache key (and now parent selection URL) by performing the following.

  1. Takes the value of the each request element (URL or header)
  2. Runs a transformation (defined by the config) on it
  3. Calls the corresponding core API to get the final result set

It is a simple and generic design which would allow to keep enhancing with:

  • new input types (i.e. remapped and pristine request URI)
  • new target types (i.e. cache key and later parent selection URLs)
  • new transformations (see the plugin documentation for examples)

The plugin does not get into the validation business which makes it generic and powerful.

The syntax validation is left to the lower level using the core API call. If the call fails it means that the transformation result is invalid, so the target should be (ideally) unchanged and an error message should be issued to the log. Pre-validation would be inefficient, error prone and would require unnecessary maintenance (keeping the two validations in-sync).

The semantic validation is left to the traffic server operator who ultimately knows the final goal (i.e. how the content should be cached or how the multi-tier caching should work).

It is true that one can easily shoot oneself in the foot but it is not feasible and also very limiting to code all use-cases into the plugin and even then we would still need to make sure it works in production.

HTH

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. It feels like a bit of a footgun if you're not careful, but it's definitely useful. (And indeed I envision using it in the not-to-distant future.) Thanks!

if (TS_SUCCESS == TSHttpTxnParentSelectionUrlSet(_txn, _buf, new_url_loc)) {
msg.assign("set parent selection URL to ").append(_key);
res = true;
} else {
msg.assign("failed to set parent selection URL");
}
} else {
msg.assign("failed to parse parent selection URL");
}
TSHandleMLocRelease(_buf, TS_NULL_MLOC, new_url_loc);
} else {
msg.assign("failed to create parent selection URL");
}
} break;
default: {
msg.assign("unknown target URI type");
} break;
}

/* Report status - debug level in case of success, error in case of failure.
* Since getting effective URI is expensive add it only in case of failure */
if (res) {
CacheKeyDebug("%.*s", static_cast<int>(msg.length()), msg.c_str());
} else {
int len;
char *url = TSHttpTxnEffectiveUrlStringGet(_txn, &len);
if (nullptr != url) {
msg.append(" for url ").append(url, len);
TSfree(url);
}
res = false;
CacheKeyError("%.*s", static_cast<int>(msg.length()), msg.c_str());
}
return res;
}
10 changes: 6 additions & 4 deletions plugins/cachekey/cachekey.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
class CacheKey
{
public:
CacheKey(TSHttpTxn txn, String separator, CacheKeyUriType urlType, TSRemapRequestInfo *rri = nullptr);
CacheKey(TSHttpTxn txn, String separator, CacheKeyUriType urlType, CacheKeyKeyType targetUrlType,
TSRemapRequestInfo *rri = nullptr);
~CacheKey();

void append(unsigned number);
Expand Down Expand Up @@ -86,7 +87,8 @@ class CacheKey
bool _valid = false; /**< @brief shows if the constructor discovered the input correctly */
bool _remap = false; /**< @brief shows if the input URI was from remap info */

String _key; /**< @brief cache key */
String _separator; /**< @brief a separator used to separate the cache key elements extracted from the URI */
CacheKeyUriType _uriType; /**< @brief the URI type used as a cachekey base: pristine, remap, etc. */
String _key; /**< @brief cache key */
String _separator; /**< @brief a separator used to separate the cache key elements extracted from the URI */
CacheKeyUriType _uriType = REMAP; /**< @brief the URI type used as a cachekey base: pristine, remap, etc. */
CacheKeyKeyType _keyType = CACHE_KEY; /**< @brief the target URI type: cache key, parent selection, etc. */
};
62 changes: 58 additions & 4 deletions plugins/cachekey/configs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,9 @@ Configs::init(int argc, const char *argv[], bool perRemapConfig)
{const_cast<char *>("remove-path"), optional_argument, nullptr, 'r'},
{const_cast<char *>("separator"), optional_argument, nullptr, 's'},
{const_cast<char *>("uri-type"), optional_argument, nullptr, 't'},
{const_cast<char *>("capture-header"), optional_argument, nullptr, 'u'},
{const_cast<char *>("canonical-prefix"), optional_argument, nullptr, 'v'},
{const_cast<char *>("key-type"), optional_argument, nullptr, 'u'},
{const_cast<char *>("capture-header"), optional_argument, nullptr, 'v'},
{const_cast<char *>("canonical-prefix"), optional_argument, nullptr, 'w'},
/* reserve 'z' for 'config' files */
{nullptr, 0, nullptr, 0},
};
Expand Down Expand Up @@ -503,10 +504,13 @@ Configs::init(int argc, const char *argv[], bool perRemapConfig)
case 't': /* uri-type */
setUriType(optarg);
break;
case 'u': /* capture-header */
case 'u': /* key-type */
setKeyType(optarg);
break;
case 'v': /* capture-header */
_headers.addCapture(optarg);
break;
case 'v': /* canonical-prefix */
case 'w': /* canonical-prefix */
_canonicalPrefix = isTrue(optarg);
break;
}
Expand Down Expand Up @@ -578,8 +582,58 @@ Configs::setUriType(const char *arg)
}
}

void
Configs::setKeyType(const char *arg)
{
if (nullptr != arg) {
if (9 == strlen(arg) && 0 == strncasecmp(arg, "cache_key", 9)) {
_keyType = CacheKeyKeyType::CACHE_KEY;
CacheKeyDebug("setting cache key");
} else if (20 == strlen(arg) && 0 == strncasecmp(arg, "parent_selection_url", 20)) {
_keyType = CacheKeyKeyType::PARENT_SELECTION_URL;
CacheKeyDebug("setting parent selection URL");
} else {
CacheKeyError("unrecognized key type '%s', using default 'cache_key'", arg);
}
} else {
CacheKeyError("found an empty key type, using default 'cache_key'");
}
}

CacheKeyUriType
Configs::getUriType()
{
return _uriType;
}

CacheKeyKeyType
Configs::getKeyType()
{
return _keyType;
}

const char *
getCacheKeyUriTypeName(CacheKeyUriType type)
{
switch (type) {
case REMAP:
return "remap";
case PRISTINE:
return "pristine";
default:
return "unknown";
}
}

const char *
getCacheKeyKeyTypeName(CacheKeyKeyType type)
{
switch (type) {
case CACHE_KEY:
return "cache key";
case PARENT_SELECTION_URL:
return "parent selection url";
default:
return "unknown";
}
}
29 changes: 24 additions & 5 deletions plugins/cachekey/configs.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ enum CacheKeyUriType {
PRISTINE,
};

enum CacheKeyKeyType {
CACHE_KEY,
PARENT_SELECTION_URL,
};

const char *getCacheKeyUriTypeName(CacheKeyUriType type);
const char *getCacheKeyKeyTypeName(CacheKeyKeyType type);

/**
* @brief Plug-in configuration elements (query / headers / cookies).
*
Expand Down Expand Up @@ -182,11 +190,21 @@ class Configs
*/
void setUriType(const char *arg);

/**
* @brief sets the target URI Type.
*/
void setKeyType(const char *arg);

/**
* @brief get URI type.
*/
CacheKeyUriType getUriType();

/**
* @brief get target URI type.
*/
CacheKeyKeyType getKeyType();

/* Make the following members public to avoid unnecessary accessors */
ConfigQuery _query; /**< @brief query parameter related configuration */
ConfigHeaders _headers; /**< @brief headers related configuration */
Expand All @@ -208,9 +226,10 @@ class Configs
*/
bool loadClassifiers(const String &args, bool blacklist = true);

bool _prefixToBeRemoved = false; /**< @brief instructs the prefix (i.e. host:port) not to added to the cache key */
bool _pathToBeRemoved = false; /**< @brief instructs the path not to added to the cache key */
bool _canonicalPrefix = false; /**< @brief keep the URI scheme and authority element used as input to transforming into key */
String _separator = "/"; /**< @brief a separator used to separate the cache key elements extracted from the URI */
CacheKeyUriType _uriType = REMAP; /**< @brief shows which URI the cache key will be based on */
bool _prefixToBeRemoved = false; /**< @brief instructs the prefix (i.e. host:port) not to added to the cache key */
bool _pathToBeRemoved = false; /**< @brief instructs the path not to added to the cache key */
bool _canonicalPrefix = false; /**< @brief keep the URI scheme and authority element used as input to transforming into key */
String _separator = "/"; /**< @brief a separator used to separate the cache key elements extracted from the URI */
CacheKeyUriType _uriType = REMAP; /**< @brief shows which URI the cache key will be based on */
CacheKeyKeyType _keyType = CACHE_KEY; /**< @brief target URI to be modified, cache key or paren selection */
};
2 changes: 1 addition & 1 deletion plugins/cachekey/plugin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static void
setCacheKey(TSHttpTxn txn, Configs *config, TSRemapRequestInfo *rri = nullptr)
{
/* Initial cache key facility from the requested URL. */
CacheKey cachekey(txn, config->getSeparator(), config->getUriType(), rri);
CacheKey cachekey(txn, config->getSeparator(), config->getUriType(), config->getKeyType(), rri);

/* Append custom prefix or the host:port */
if (!config->prefixToBeRemoved()) {
Expand Down