Skip to content
This repository has been archived by the owner on Jul 26, 2024. It is now read-only.

Commit

Permalink
feat: relax click/impression host checking (#113)
Browse files Browse the repository at this point in the history
* feat: relax click/impression host checking

allow partial matching to the domains specified in the inclusion filter

e.g. "click_hosts": ["example.com"] now allows "foo.example.com"

Closes #109
  • Loading branch information
pjenvey authored May 26, 2021
1 parent 04b6fa0 commit fded42c
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 62 deletions.
88 changes: 77 additions & 11 deletions src/adm/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,28 @@ pub struct AdmFilter {
pub filter_set: HashMap<String, AdmAdvertiserFilterSettings>,
}

/// Check that a given URL is valid according to it's corresponding filter
fn check_url(url: Url, species: &'static str, filter: &[String]) -> HandlerResult<bool> {
let host = match url.host() {
Some(v) => v.to_string(),
None => {
return Err(HandlerErrorKind::MissingHost(species, url.to_string()).into());
/// Extract the host from Url
fn get_host(url: Url, species: &'static str) -> HandlerResult<String> {
url.host()
.map(|host| host.to_string())
.ok_or_else(|| HandlerErrorKind::MissingHost(species, url.to_string()).into())
}

/// Check that a given URL is valid according to it's corresponding filter.
///
/// Allows a partial match: a filter setting for "example.com" (["example",
/// "com"]) allows "foo.example.com" and "quux.bar.example.com" (["quux",
/// "bar", "example", "com"])
fn check_url(url: Url, species: &'static str, filter: &[Vec<String>]) -> HandlerResult<bool> {
let host = get_host(url, species)?;
let domains: Vec<_> = host.split('.').collect();
for allowed in filter {
let begin = domains.len() - allowed.len().min(domains.len());
if &domains[begin..] == allowed {
return Ok(true);
}
};
if !filter.contains(&host) {
return Err(HandlerErrorKind::UnexpectedHost(species, host).into());
}
Ok(true)
Err(HandlerErrorKind::UnexpectedHost(species, host).into())
}

/// Filter a given tile data set provided by ADM and validate the various elements
Expand Down Expand Up @@ -80,7 +90,11 @@ impl AdmFilter {
return Err(HandlerErrorKind::InvalidHost(species, url.to_string()).into());
}
};
check_url(parsed, species, &filter.advertiser_hosts)?;

let host = get_host(parsed, species)?;
if !filter.advertiser_hosts.contains(&host) {
return Err(HandlerErrorKind::UnexpectedHost(species, host).into());
}
Ok(())
}

Expand Down Expand Up @@ -237,3 +251,55 @@ impl AdmFilter {
}
}
}

#[cfg(test)]
mod tests {
use super::check_url;

#[test]
fn check_url_matches() {
let species = "Click";
assert!(check_url(
"https://example.com".parse().unwrap(),
species,
&[vec!["example".to_owned(), "com".to_owned()]]
)
.unwrap());

assert!(check_url(
"https://foo.bridge.example.com/?quux=baz".parse().unwrap(),
species,
&[vec!["example".to_owned(), "com".to_owned()]]
)
.unwrap());
}

#[test]
fn check_url_failed() {
let species = "Click";
assert!(check_url(
"https://foo.com".parse().unwrap(),
species,
&[vec!["example".to_owned(), "com".to_owned()]]
)
.is_err());

assert!(check_url(
"https://foo.com".parse().unwrap(),
species,
&[vec![
"bar".to_owned(),
"example".to_owned(),
"com".to_owned()
]]
)
.is_err());

assert!(check_url(
"https://badexample.com".parse().unwrap(),
species,
&[vec!["example".to_owned(), "com".to_owned()]]
)
.is_err());
}
}
49 changes: 46 additions & 3 deletions src/adm/settings.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{collections::HashMap, fmt::Debug};

use serde::{Deserialize, Serialize};
use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};

use super::AdmFilter;
use crate::{error::HandlerResult, settings::Settings};
Expand All @@ -21,15 +21,58 @@ pub struct AdmAdvertiserFilterSettings {
/// Set of valid hosts for the `advertiser_url`
pub(crate) advertiser_hosts: Vec<String>,
/// Set of valid hosts for the `impression_url`
pub(crate) impression_hosts: Vec<String>,
#[serde(
deserialize_with = "deserialize_hosts",
serialize_with = "serialize_hosts"
)]
pub(crate) impression_hosts: Vec<Vec<String>>,
/// Set of valid hosts for the `click_url`
pub(crate) click_hosts: Vec<String>,
#[serde(
deserialize_with = "deserialize_hosts",
serialize_with = "serialize_hosts"
)]
pub(crate) click_hosts: Vec<Vec<String>>,
/// valid position for the tile
pub(crate) position: Option<u8>,
/// Set of valid regions for the tile (e.g ["en", "en-US/TX"])
pub(crate) include_regions: Vec<String>,
}

/// Parse JSON:
/// ["example.com", "foo.net"]
/// into:
/// [["example", "com"], ["foo", "net"]]
fn deserialize_hosts<'de, D>(d: D) -> Result<Vec<Vec<String>>, D::Error>
where
D: Deserializer<'de>,
{
Deserialize::deserialize(d).map(|hosts: Vec<String>| {
hosts
.into_iter()
.map(|host| -> Vec<_> { host.split('.').map(ToOwned::to_owned).collect() })
.collect()
})
}

/// Serialize:
/// [["example", "com"], ["foo", "net"]]
/// into:
/// ["example.com", "foo.net"]
fn serialize_hosts<S>(hosts: &[Vec<String>], s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let hosts: Vec<_> = hosts
.iter()
.map(|split_host| split_host.join("."))
.collect();
let mut seq = s.serialize_seq(Some(hosts.len()))?;
for host in hosts {
seq.serialize_element(&host)?;
}
seq.end()
}

pub(crate) type AdmSettings = HashMap<String, AdmAdvertiserFilterSettings>;

impl From<&Settings> for AdmSettings {
Expand Down
85 changes: 37 additions & 48 deletions src/web/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use actix_web::{
use serde_json::{json, Value};

use crate::{
adm::{AdmAdvertiserFilterSettings, AdmFilter, AdmSettings, DEFAULT},
adm::{AdmFilter, AdmSettings, DEFAULT},
build_app,
error::{HandlerError, HandlerResult},
metrics::Metrics,
Expand Down Expand Up @@ -81,49 +81,37 @@ fn init_mock_adm(response: String) -> (dev::Server, SocketAddr) {
}

fn adm_settings() -> AdmSettings {
let mut adm_settings = AdmSettings::default();
adm_settings.insert(
"Acme".to_owned(),
AdmAdvertiserFilterSettings {
advertiser_hosts: ["www.acme.biz".to_owned()].to_vec(),
position: Some(0),
include_regions: vec![],
impression_hosts: vec![],
click_hosts: vec![],
},
);
adm_settings.insert(
"Dunder Mifflin".to_owned(),
AdmAdvertiserFilterSettings {
advertiser_hosts: ["www.dunderm.biz".to_owned()].to_vec(),
position: Some(1),
include_regions: vec![],
impression_hosts: [].to_vec(),
click_hosts: vec![],
let adm_settings = json!({
"Acme": {
"advertiser_hosts": ["www.acme.biz"],
"impression_hosts": [],
"click_hosts": [],
"position": 0,
"include_regions": []
},
);
adm_settings.insert(
"Los Pollos Hermanos".to_owned(),
AdmAdvertiserFilterSettings {
advertiser_hosts: ["www.lph-nm.biz".to_owned()].to_vec(),
position: Some(2),
include_regions: vec![],
impression_hosts: vec![],
click_hosts: vec![],
"Dunder Mifflin": {
"advertiser_hosts": ["www.dunderm.biz"],
"impression_hosts": [],
"click_hosts": [],
"position": 1,
"include_regions": []
},
);
// This is the "default" setting definitions.
adm_settings.insert(
DEFAULT.to_owned(),
AdmAdvertiserFilterSettings {
advertiser_hosts: vec![],
position: None,
include_regions: vec![],
impression_hosts: ["example.net".to_owned()].to_vec(),
click_hosts: ["example.com".to_owned()].to_vec(),
"Los Pollos Hermanos": {
"advertiser_hosts": ["www.lph-nm.biz"],
"impression_hosts": [],
"click_hosts": [],
"position": 2,
"include_regions": []
},
);
adm_settings
DEFAULT: {
"advertiser_hosts": [],
"impression_hosts": ["example.net"],
"click_hosts": ["example.com"],
"position": null,
"include_regions": []
}
});
serde_json::from_value(adm_settings).unwrap()
}

#[actix_rt::test]
Expand Down Expand Up @@ -267,13 +255,14 @@ async fn basic_filtered() {
let mut adm_settings = adm_settings();
adm_settings.insert(
"Example".to_owned(),
AdmAdvertiserFilterSettings {
advertiser_hosts: ["www.example.ninja".to_owned()].to_vec(),
position: Some(100),
include_regions: Vec::new(),
impression_hosts: ["example.net".to_owned()].to_vec(),
click_hosts: ["example.com".to_owned()].to_vec(),
},
serde_json::from_value(json!({
"advertiser_hosts": ["www.example.ninja"],
"impression_hosts": ["example.net"],
"click_hosts": ["example.com"],
"position": 100,
"include_regions": []
}))
.unwrap(),
);
adm_settings.remove("Dunder Mifflin");

Expand Down

0 comments on commit fded42c

Please sign in to comment.