-
Notifications
You must be signed in to change notification settings - Fork 2
/
dask_vs_multiprocessing.py
73 lines (57 loc) · 1.87 KB
/
dask_vs_multiprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Make http requests in parallel using multiprocessing and dask
Which API is nicer
Timing each suggests that dask/multiprocessing, as expected, speed up the requests by running in parallel.
It might be preferrable to run in an asychronos manner instead of parallel processing.
"""
import multiprocessing
import timeit
from datetime import datetime
import time
import dask
import pandas as pd
import requests
def fetch_results(date):
date_str = date.strftime("%Y-%m-%d")
r = requests.get(f"https://api.carbonintensity.org.uk/intensity/date/{date_str}")
return r.json()
def fetch_results_mock(date):
time.sleep(0.2)
return date
if __name__ == "__main__":
fcn = fetch_results
fcn = fetch_results_mock
params = pd.date_range(datetime(2020, 1, 1), periods=20).tolist()
# for loop
t1 = timeit.default_timer()
results_for = list()
for param in params:
results_for.append(fcn(param))
t2 = timeit.default_timer()
t_for = t2 - t1
print(f"For loop: {t_for:.1f}")
# map
t1 = timeit.default_timer()
results_map = list(map(fcn, params))
t2 = timeit.default_timer()
t_map = t2 - t1
print(f"Map: {t_map:.1f}")
# multiprocessing
# Doesnt run in ipython - https://stackoverflow.com/questions/48846085/python-multiprocessing-within-jupyter-notebook
mp = multiprocessing.Pool(8)
t1 = timeit.default_timer()
results_mp = mp.map(fcn, params)
t2 = timeit.default_timer()
t_mp = t2 - t1
print(f"Multiprocessing: {t_mp:.1f}")
# dask
t1 = timeit.default_timer()
results_dask = list(map(dask.delayed(fcn), params))
results_dask = dask.compute(results_dask)[0]
t2 = timeit.default_timer()
t_dask = t2 - t1
print(f"Dask: {t_dask:.1f}")
# check results
assert results_for == results_map
assert results_for == results_mp
assert results_for == results_dask