Skip to content
This repository has been archived by the owner on Aug 2, 2024. It is now read-only.

Commit

Permalink
Merge pull request #5 from ukwa/acl-optimiz
Browse files Browse the repository at this point in the history
ACL Optimizations
  • Loading branch information
anjackson authored Feb 14, 2019
2 parents dcb1f0e + 927e4d4 commit 623f0da
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
25 changes: 17 additions & 8 deletions pywb/warcserver/access_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,27 @@ def create_access_source(self, filename):

def find_access_rule(self, url, ts=None, urlkey=None):
params = {'url': url, 'urlkey': urlkey}
print("Getting acl_iter...')
acl_iter, errs = self.aggregator(params)
if errs:
print(errs)

key = params['key'].decode('utf-8')

print("Iterating acl_iter...')
tld = key.split(',')[0]

for acl in acl_iter:
# skip empty/invalid lines
if 'urlkey' not in acl:
continue

if key.startswith(acl['urlkey']):
return acl

# if acl key already less than first tld,
# no match can be found
if acl['urlkey'] < tld:
break

return self.default_rule

def __call__(self, res):
Expand All @@ -102,21 +108,24 @@ def wrap_iter(self, cdx_iter):
last_url = None

for cdx in cdx_iter:
print("Looking at",cdx)
url = cdx.get('url')
print(url)
# if no url, possible idx or other object, don't apply any checks and pass through
if not url:
yield cdx
continue

rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
print(rule)
# TODO: optimization until date range support is included
if url == last_url:
rule = last_rule
else:
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))

access = rule.get('access', 'exclude')
print(access)
if access == 'exclude':
continue

print("Yielding...")
cdx['access'] = access
yield cdx

last_rule = rule
last_url = url
2 changes: 1 addition & 1 deletion sample_archive/access/pywb.aclj
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
org,iana)/about - {"access": "block"}
org,iana)/_css/2013.1/fonts/opensans-semibold.ttf - {"access": "allow"}
org,iana)/_css - {"access": "exclude"}
org,example)/?example=1 - {"access": "block"}
org,iana)/ - {"access": "exclude"}
org,example)/?example=1 - {"access": "block"}

0 comments on commit 623f0da

Please sign in to comment.