Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add whitelist regex support #612

Merged
merged 24 commits into from
Sep 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
fdff462
Add support for regex filters for whitelisting.
DL6ER Jul 7, 2019
d79891d
New table is called regex_whitelist (at least for now).
DL6ER Jul 7, 2019
b0838aa
Temporarily disable regex debugging as it generated hundreds of thous…
DL6ER Jul 7, 2019
c0eedd5
Rename table regex to regex_blacklist.
DL6ER Jul 8, 2019
fb46dcf
Be more specific about how many of which kind of regex filters we com…
DL6ER Jul 8, 2019
ea6cddb
Added 8 new tests ensuring both whitelist exact and regex unblock reg…
DL6ER Jul 9, 2019
43a2c9f
Adjust expected statistics as we query additional domains now.
DL6ER Jul 9, 2019
95be30d
Add test "Regex blacklist match + whitelist regex match is not blocked".
DL6ER Jul 9, 2019
f996753
Rename in_whitelist() to whitelisted() as this routine does not only …
DL6ER Jul 9, 2019
8294f75
Rename init_regex() to compile_regex() as this describes better what …
DL6ER Jul 9, 2019
cb29305
Declare free_regex() and log_regex() static as we do not need to make…
DL6ER Jul 9, 2019
e14322b
Declare free_regex() static as we do not need to make it globally ava…
DL6ER Jul 9, 2019
d362a5e
Merge branch 'development' into new/whitelist-regex-support
DL6ER Jul 14, 2019
be1b573
Rename regex blacklist trigger to tr_regex_blacklist_update.
DL6ER Jul 16, 2019
caf18e1
Improve difference between between REGEX and TABLE constants by renam…
DL6ER Jul 16, 2019
a3f25f9
Do not check regex whitelist filters when querying the domain_audit t…
DL6ER Jul 16, 2019
eaf3d23
Print failed regex line instead of its ID when reporting an error.
DL6ER Jul 16, 2019
dd8af36
Add debugging output for gravity table if requested via DEBUG_DATABAS…
DL6ER Jul 16, 2019
e3fc3a7
Clarify that the shown number is the error code.
DL6ER Jul 16, 2019
fe23292
regexec() can only return zero or NOMATCH according to its man page. …
DL6ER Jul 16, 2019
8c6736a
Fix minor typo.
DL6ER Jul 16, 2019
369a056
Simplify log_regex_error().
DL6ER Jul 16, 2019
2a731f0
Remove call to sqlite3_finalize(stmt) on error in gravityDB_getDomain…
DL6ER Aug 7, 2019
f3e8d23
Merge branch 'development' into new/whitelist-regex-support
DL6ER Aug 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/FTL.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ enum { TYPE_A = 1, TYPE_AAAA, TYPE_ANY, TYPE_SRV, TYPE_SOA, TYPE_PTR, TYPE_TXT,
enum { REPLY_UNKNOWN, REPLY_NODATA, REPLY_NXDOMAIN, REPLY_CNAME, REPLY_IP, REPLY_DOMAIN, REPLY_RRNAME, REPLY_SERVFAIL, REPLY_REFUSED, REPLY_NOTIMP, REPLY_OTHER };
enum { PRIVACY_SHOW_ALL = 0, PRIVACY_HIDE_DOMAINS, PRIVACY_HIDE_DOMAINS_CLIENTS, PRIVACY_MAXIMUM, PRIVACY_NOSTATS };
enum { MODE_IP, MODE_NX, MODE_NULL, MODE_IP_NODATA_AAAA, MODE_NODATA };
enum { GRAVITY_LIST, BLACK_LIST, WHITE_LIST, REGEX_LIST, UNKNOWN_LIST };

// Use out own memory handling functions that will detect possible errors
// and report accordingly in the log. This will make debugging FTL crashs
Expand Down
6 changes: 0 additions & 6 deletions src/api/request.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,8 @@ void process_request(const char *client_message, int *sock)
logg("Received API request to recompile regex");
lock_shm();
// Reread regex.list
// Free regex list and array of whitelisted domains
free_regex();
// Start timer for regex compilation analysis
timer_start(REGEX_TIMER);
// Read and compile possible regex filters
read_regex_from_database();
// Log result
log_regex(timer_elapsed_msec(REGEX_TIMER));
unlock_shm();
}
else if(command(client_message, ">update-mac-vendor"))
Expand Down
98 changes: 57 additions & 41 deletions src/database/gravity-db.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// global variable counters
#include "memory.h"
#include "sqlite3.h"
// match_regex()
#include "regex_r.h"

// Private variables
static sqlite3 *gravity_db = NULL;
Expand All @@ -23,6 +25,9 @@ static sqlite3_stmt* whitelist_stmt = NULL;
static sqlite3_stmt* auditlist_stmt = NULL;
bool gravity_database_avail = false;

// Table names corresponding to the enum defined in gravity-db.h
static const char* tablename[] = { "vw_gravity", "vw_blacklist", "vw_whitelist", "vw_regex_blacklist", "vw_regex_whitelist" , ""};

// Prototypes from functions in dnsmasq's source
void rehash(int size);

Expand Down Expand Up @@ -119,37 +124,37 @@ bool gravityDB_getTable(const unsigned char list)
{
if(!gravity_database_avail)
{
logg("gravityDB_getTable(%d): Gravity database not available", list);
logg("gravityDB_getTable(%u): Gravity database not available", list);
return false;
}

// Checking for smaller than GRAVITY_LIST is omitted due to list being unsigned
DL6ER marked this conversation as resolved.
Show resolved Hide resolved
if(list >= UNKNOWN_TABLE)
{
logg("gravityDB_getTable(%u): Requested list is not known!", list);
return false;
}

// Select correct query string to be used depending on list to be read
const char *querystr = NULL;
switch(list)
char *querystr = NULL;
// Build correct query string to be used depending on list to be read
if(asprintf(&querystr, "SELECT domain FROM %s", tablename[list]) < 18)
{
case GRAVITY_LIST:
querystr = "SELECT domain FROM vw_gravity;";
break;
case BLACK_LIST:
querystr = "SELECT domain FROM vw_blacklist;";
break;
case REGEX_LIST:
querystr = "SELECT domain FROM vw_regex;";
break;
default:
logg("gravityDB_getTable(%i): Requested list is not known!", list);
return false;
logg("readGravity(%u) - asprintf() error", list);
return false;
}

// Prepare SQLite3 statement
int rc = sqlite3_prepare_v2(gravity_db, querystr, -1, &table_stmt, NULL);
if( rc )
if(rc != SQLITE_OK)
{
logg("readGravity(%s) - SQL error prepare (%i): %s", querystr, rc, sqlite3_errmsg(gravity_db));
gravityDB_close();
free(querystr);
return false;
}

// Free allocated memory and return success
free(querystr);
return true;
}

Expand Down Expand Up @@ -179,7 +184,6 @@ inline const char* gravityDB_getDomain(void)
if(rc != SQLITE_DONE)
{
logg("gravityDB_getDomain() - SQL error step (%i): %s", rc, sqlite3_errmsg(gravity_db));
gravityDB_finalizeTable();
return NULL;
}

Expand Down Expand Up @@ -208,42 +212,41 @@ int gravityDB_count(const unsigned char list)
return DB_FAILED;
}

// Select correct query string to be used depending on list to be read
const char* querystr = NULL;
switch(list)
// Checking for smaller than GRAVITY_LIST is omitted due to list being unsigned
if(list >= UNKNOWN_TABLE)
{
logg("gravityDB_getTable(%u): Requested list is not known!", list);
return false;
}

char *querystr = NULL;
// Build correct query string to be used depending on list to be read
if(asprintf(&querystr, "SELECT count(domain) FROM %s", tablename[list]) < 18)
{
case GRAVITY_LIST:
querystr = "SELECT COUNT(*) FROM vw_gravity;";
break;
case BLACK_LIST:
querystr = "SELECT COUNT(*) FROM vw_blacklist;";
break;
case WHITE_LIST:
querystr = "SELECT COUNT(*) FROM vw_whitelist;";
break;
case REGEX_LIST:
querystr = "SELECT COUNT(*) FROM vw_regex;";
break;
default:
logg("gravityDB_count(%i): Requested list is not known!", list);
return DB_FAILED;
logg("readGravity(%u) - asprintf() error", list);
return false;
}

if(config.debug & DEBUG_DATABASE)
logg("Querying gravity database table %s", tablename[list]);

// Prepare query
int rc = sqlite3_prepare_v2(gravity_db, querystr, -1, &table_stmt, NULL);
if( rc ){
if(rc != SQLITE_OK){
logg("gravityDB_count(%s) - SQL error prepare (%i): %s", querystr, rc, sqlite3_errmsg(gravity_db));
sqlite3_finalize(table_stmt);
gravityDB_close();
free(querystr);
return DB_FAILED;
}

// Perform query
rc = sqlite3_step(table_stmt);
if( rc != SQLITE_ROW ){
if(rc != SQLITE_ROW){
logg("gravityDB_count(%s) - SQL error step (%i): %s", querystr, rc, sqlite3_errmsg(gravity_db));
sqlite3_finalize(table_stmt);
gravityDB_close();
free(querystr);
return DB_FAILED;
}

Expand All @@ -253,6 +256,8 @@ int gravityDB_count(const unsigned char list)
// Finalize statement
gravityDB_finalizeTable();

// Free allocated memory and return result
free(querystr);
return result;
}

Expand Down Expand Up @@ -309,18 +314,29 @@ static bool domain_in_list(const char *domain, sqlite3_stmt* stmt)
// all host parameters to NULL.
sqlite3_clear_bindings(stmt);

// Return result.
// Return if domain was found in current table
// SELECT EXISTS(...) either returns 0 (false) or 1 (true).
return result == 1;
return (result == 1);
}

bool in_whitelist(const char *domain)
{
return domain_in_list(domain, whitelist_stmt);
if(config.debug & DEBUG_DATABASE)
logg("Querying whitelist for %s", domain);
// We have to check both the exact whitelist (using a prepared database statement)
// as well the compiled regex whitelist filters to check if the current domain is
// whitelisted. Due to short-circuit-evaluation in C, the regex evaluations is executed
// only if the exact whitelist lookup does not deliver a positive match. This is an
// optimization as the database lookup will most likely hit (a) more domains and (b)
// will be faster (given a sufficiently large number of regex whitelisting filters).
return domain_in_list(domain, whitelist_stmt) || match_regex(domain, REGEX_WHITELIST);
}

bool in_auditlist(const char *domain)
{
if(config.debug & DEBUG_DATABASE)
logg("Querying audit list for %s", domain);
// We check the domain_audit table for the given domain
return domain_in_list(domain, auditlist_stmt);
}

3 changes: 3 additions & 0 deletions src/database/gravity-db.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#ifndef GRAVITY_H
#define GRAVITY_H

// Table indices
enum { GRAVITY_TABLE, EXACT_BLACKLIST_TABLE, EXACT_WHITELIST_TABLE, REGEX_BLACKLIST_TABLE, REGEX_WHITELIST_TABLE, UNKNOWN_TABLE };

bool gravityDB_open(void);
void gravityDB_close(void);
bool gravityDB_getTable(unsigned char list);
Expand Down
20 changes: 9 additions & 11 deletions src/dnsmasq_interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ void _FTL_new_query(const unsigned int flags, const char *name, const struct all
// of a specific domain. The logic herein is:
// If matched, then compare against whitelist
// If in whitelist, negate matched so this function returns: not-to-be-blocked
if(match_regex(domainString) && !in_whitelist(domainString))
if(match_regex(domainString, REGEX_BLACKLIST) && !in_whitelist(domainString))
{
// We have to block this domain
block_single_domain_regex(domainString);
Expand Down Expand Up @@ -415,20 +415,13 @@ void FTL_dnsmasq_reload(void)
// Reread pihole-FTL.conf to see which debugging flags are set
read_debuging_settings(NULL);

// Free regex list
free_regex();

// (Re-)open gravity database connection
gravityDB_close();
gravityDB_open();

// Start timer for regex compilation analysis
timer_start(REGEX_TIMER);
// Read and compile possible regex filters
// only after having called gravityDB_open()
read_regex_from_database();
// Log result
log_regex(timer_elapsed_msec(REGEX_TIMER));

// Print current set of capabilities if requested via debug flag
if(config.debug & DEBUG_CAPS)
Expand Down Expand Up @@ -1430,6 +1423,11 @@ static int FTL_table_import(const char *tablename, const unsigned char list, con
if(len == 0)
continue;

// Do not add gravity or blacklist domains that match
// a regex-based whitelist filter
if(match_regex(domain, REGEX_WHITELIST))
continue;

// As of here we assume the entry to be valid
// Rehash every 1000 valid names
if(rhash && ((name_count - cache_size) > 1000))
Expand Down Expand Up @@ -1487,10 +1485,10 @@ int FTL_database_import(int cache_size, struct crec **rhash, int hashsz)
return cache_size;
}

// Import gravity and blacklist domains
// Import gravity and exact blacklisted domains
int added;
added = FTL_table_import("gravity", GRAVITY_LIST, SRC_GRAVITY, addr4, addr6, has_IPv4, has_IPv6, cache_size, rhash, hashsz);
added += FTL_table_import("blacklist", BLACK_LIST, SRC_BLACK, addr4, addr6, has_IPv4, has_IPv6, cache_size, rhash, hashsz);
added = FTL_table_import("gravity", GRAVITY_TABLE, SRC_GRAVITY, addr4, addr6, has_IPv4, has_IPv6, cache_size, rhash, hashsz);
added += FTL_table_import("blacklist", EXACT_BLACKLIST_TABLE, SRC_BLACK, addr4, addr6, has_IPv4, has_IPv6, cache_size, rhash, hashsz);

// Update counter of blocked domains
counters->gravity = added;
Expand Down
3 changes: 0 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,6 @@ int main (int argc, char* argv[])
close_telnet_socket();
close_unix_socket();

// Free regex list and array of whitelisted domains
free_regex();
AzureMarker marked this conversation as resolved.
Show resolved Hide resolved

// Remove shared memory objects
destroy_shmem();

Expand Down
Loading