-
Notifications
You must be signed in to change notification settings - Fork 1
/
FFDB.py
53 lines (44 loc) · 1.31 KB
/
FFDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from hashlib import sha256
import pickle
import gzip
from pathlib import Path
import requests
import bs4
import json
import pickle
import datetime
from concurrent.futures import ProcessPoolExecutor as PPE
import itertools
#import pandas as pd
import re
import glob
class FFDB(object):
def __init__(self, tar_path='tmp/ffdb'):
self.tar_path = tar_path
Path(self.tar_path).mkdir(exist_ok=True, parents=True)
def get_hashed_fs(self, key):
hashed = sha256(bytes(key, 'utf8')).hexdigest()[:16]
fn = f'{self.tar_path}/{hashed}'
return fn
def exists(self, key):
fn = self.get_hashed_fs(key)
if Path(fn).exists():
return True
return False
def save(self, key, val):
fn = self.get_hashed_fs(key)
with open(fn, 'wb') as fs:
fs.write(gzip.compress(pickle.dumps(val)))
def get(self, key):
fn = self.get_hashed_fs(key)
if not Path(fn).exists():
return None
obj = None
with open(fn, 'rb') as fs:
obj = pickle.loads(gzip.decompress(fs.read()))
return obj
def get_iter(self):
for fn in glob.glob(f'{self.tar_path}/*'):
with open(fn, 'rb') as fp:
obj = pickle.loads(gzip.decompress(fp.read()))
yield obj