Skip to content

Commit

Permalink
Merge pull request #13 from EFForg/add-domain-change-summary
Browse files Browse the repository at this point in the history
Summarize what changed in between two runs
  • Loading branch information
bcyphers authored Jun 26, 2018
2 parents 07eb944 + 104bff8 commit 0b2efec
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 11 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ WORKDIR /home/$USER
COPY requirements.txt .
RUN pip3 install --user -r requirements.txt

COPY crawler.py docker-entry.sh /home/$USER/
COPY crawler.py validate.py docker-entry.sh /home/$USER/
COPY results.json /home/$USER/old-results.json
COPY privacybadger /home/$USER/privacybadger
ENV OUTPATH=/home/$USER/out
ENV EXTPATH=/home/$USER/privacybadger/src
Expand Down
12 changes: 11 additions & 1 deletion docker-entry.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
#!/bin/bash
./crawler.py --out-path $OUTPATH --ext-path $EXTPATH "$@"

# run the scan
if ! ./crawler.py --out-path $OUTPATH --ext-path $EXTPATH "$@" ; then
exit 1
fi

# validate the output and print a summary of the changes
if ! ./validate.py old-results.json $OUTPATH/results.json ; then
echo "results.json is invalid."
exit 1
fi
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
colorama==0.3.9
selenium==3.12.0
tldextract==2.2.0
xvfbwrapper==0.2.9
7 changes: 0 additions & 7 deletions runscan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,6 @@ fi
#--security-opt seccomp=./chrome-seccomp.json \
#badger-sett "$@"

# Validate the output
if ! ./validate.py results.json $DOCKER_OUT/results.json ; then
mv $DOCKER_OUT/log.txt ./
echo "Scan failed: results.json is invalid."
exit 1
fi

# back up old results
cp results.json results-prev.json

Expand Down
53 changes: 51 additions & 2 deletions validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import sys
import os

from collections import defaultdict

import colorama
import tldextract

# Use: ./validate.py old.json new.json
KEYS = ['action_map', 'snitch_map', 'version']
old_path = sys.argv[1]
Expand All @@ -26,11 +31,11 @@
assert k in new_js

# make sure there is data in the maps
if not len(new_js['snitch_map'].keys()):
if not new_js['snitch_map'].keys():
print("Error: Snitch map empty.")
sys.exit(1)

if not len(new_js['action_map'].keys()):
if not new_js['action_map'].keys():
print("Error: Action map empty.")
sys.exit(1)

Expand All @@ -41,4 +46,48 @@
print("New action map has %d new domains and dropped %d old domains" %
(len(new_keys - overlap), len(old_keys - overlap)))

colorama.init()
C_GREEN = colorama.Style.BRIGHT + colorama.Fore.GREEN
C_RED = colorama.Style.BRIGHT + colorama.Fore.RED
C_RESET = colorama.Style.RESET_ALL

extract = tldextract.TLDExtract(cache_file=False)

BLOCKED = ("block", "cookieblock")

blocked_old = defaultdict(list)
for domain in old_js['action_map'].keys():
if old_js['action_map'][domain]['heuristicAction'] not in BLOCKED:
continue

base = extract(domain).registered_domain
blocked_old[base].append(domain)

blocked_new = defaultdict(list)
for domain in new_js['action_map'].keys():
if new_js['action_map'][domain]['heuristicAction'] not in BLOCKED:
continue

base = extract(domain).registered_domain
blocked_new[base].append(domain)

blocked_bases_old = set(blocked_old.keys())
blocked_bases_new = set(blocked_new.keys())

print("\n{}++{} Newly blocked domains:\n".format(C_GREEN, C_RESET))
for x in sorted(blocked_bases_new - blocked_bases_old):
print(" {}{}{} ({})".format(
C_GREEN, x, C_RESET, len(blocked_new[x])))
for y in sorted(blocked_new[x]):
print(" • {}".format(y))

print("\n{}--{} No longer blocked domains:\n".format(C_RED, C_RESET))
for x in sorted(blocked_bases_old - blocked_bases_new):
print(" {}{}{} ({})".format(
C_RED, x, C_RESET, len(blocked_old[x])))
for y in sorted(blocked_old[x]):
print(" • {}".format(y))

print("")

sys.exit(0)

0 comments on commit 0b2efec

Please sign in to comment.