Skip to content
This repository was archived by the owner on May 31, 2021. It is now read-only.

Commit b3c3da7

Browse files
author
Vincent Michel
committed
Update webscraper examples to promote the use of a main coroutine
1 parent db0dcad commit b3c3da7

6 files changed

+76
-107
lines changed

examples/aiohttp_client.py

+20-27
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,26 @@
1-
"""aiohttp-based client to retrieve web pages.
2-
"""
1+
"""aiohttp-based client to retrieve web pages."""
32

4-
import asyncio
5-
from contextlib import closing
63
import time
7-
4+
import asyncio
85
import aiohttp
96

107

118
async def fetch_page(session, host, port=8000, wait=0):
12-
"""Get one page.
13-
"""
9+
"""Get one page."""
1410
url = '{}:{}/{}'.format(host, port, wait)
1511
with aiohttp.Timeout(10):
1612
async with session.get(url) as response:
1713
assert response.status == 200
18-
return await response.text()
14+
text = await response.text()
15+
return text.strip('\n')
1916

2017

21-
def get_multiple_pages(host, waits, port=8000, show_time=True):
22-
"""Get multiple pages.
23-
"""
24-
tasks = []
25-
pages = []
18+
async def get_multiple_pages(host, waits, port=8000, show_time=True):
19+
"""Get multiple pages."""
2620
start = time.perf_counter()
27-
with closing(asyncio.get_event_loop()) as loop:
28-
with aiohttp.ClientSession() as session:
29-
for wait in waits:
30-
tasks.append(fetch_page(session, host, port, wait))
31-
pages = loop.run_until_complete(asyncio.gather(*tasks))
21+
with aiohttp.ClientSession() as session:
22+
tasks = [fetch_page(session, host, port, wait) for wait in waits]
23+
pages = await asyncio.gather(*tasks)
3224
duration = time.perf_counter() - start
3325
sum_waits = sum(waits)
3426
if show_time:
@@ -37,14 +29,15 @@ def get_multiple_pages(host, waits, port=8000, show_time=True):
3729
return pages
3830

3931

40-
if __name__ == '__main__':
32+
async def main():
33+
"""Test it."""
34+
pages = await get_multiple_pages(
35+
host='http://localhost', port='8000', waits=[1, 5, 3, 2])
36+
for page in pages:
37+
print(page)
4138

42-
def main():
43-
"""Test it.
44-
"""
45-
pages = get_multiple_pages(host='http://localhost', port='8000',
46-
waits=[1, 5, 3, 2])
47-
for page in pages:
48-
print(page)
4939

50-
main()
40+
if __name__ == '__main__':
41+
loop = asyncio.get_event_loop()
42+
loop.run_until_complete(main())
43+
loop.close()

examples/async_client_blocking.py

+18-20
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,36 @@
1-
"""Get "web pages.
1+
"""Get web pages.
22
3-
Waiting until one pages is download before getting the next."
3+
Waiting until one pages is download before getting the next.
44
"""
55

6-
import asyncio
7-
from contextlib import closing
86
import time
9-
7+
import asyncio
108
from async_page import get_page
119

1210

13-
def get_multiple_pages(host, port, waits, show_time=True):
14-
"""Get multiple pages.
15-
"""
11+
async def get_multiple_pages(host, port, waits, show_time=True):
12+
"""Get multiple pages."""
1613
start = time.perf_counter()
1714
pages = []
18-
with closing(asyncio.get_event_loop()) as loop:
19-
for wait in waits:
20-
pages.append(loop.run_until_complete(get_page(host, port, wait)))
15+
for wait in waits:
16+
pages.append(await get_page(host, port, wait))
2117
duration = time.perf_counter() - start
2218
sum_waits = sum(waits)
2319
if show_time:
2420
msg = 'It took {:4.2f} seconds for a total waiting time of {:4.2f}.'
2521
print(msg.format(duration, sum_waits))
2622
return pages
2723

28-
if __name__ == '__main__':
2924

30-
def main():
31-
"""Test it.
32-
"""
33-
pages = get_multiple_pages(host='localhost', port='8000',
34-
waits=[1, 5, 3, 2])
35-
for page in pages:
36-
print(page)
25+
async def main():
26+
"""Test it."""
27+
pages = await get_multiple_pages(
28+
host='localhost', port='8000', waits=[1, 5, 3, 2])
29+
for page in pages:
30+
print(page)
31+
3732

38-
main()
33+
if __name__ == '__main__':
34+
loop = asyncio.get_event_loop()
35+
loop.run_until_complete(main())
36+
loop.close()

examples/async_client_nonblocking.py

+18-23
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,35 @@
1-
"""Get "web pages.
1+
"""Get web pages.
22
3-
Waiting until one pages is download before getting the next."
3+
Waiting until one pages is download before getting the next.
44
"""
55

6-
import asyncio
7-
from contextlib import closing
86
import time
9-
7+
import asyncio
108
from async_page import get_page
119

1210

13-
def get_multiple_pages(host, port, waits, show_time=True):
14-
"""Get multiple pages.
15-
"""
11+
async def get_multiple_pages(host, port, waits, show_time=True):
12+
"""Get multiple pages."""
1613
start = time.perf_counter()
17-
pages = []
18-
tasks = []
19-
with closing(asyncio.get_event_loop()) as loop:
20-
for wait in waits:
21-
tasks.append(get_page(host, port, wait))
22-
pages = loop.run_until_complete(asyncio.gather(*tasks))
14+
tasks = [get_page(host, port, wait) for wait in waits]
15+
pages = await asyncio.gather(*tasks)
2316
duration = time.perf_counter() - start
2417
sum_waits = sum(waits)
2518
if show_time:
2619
msg = 'It took {:4.2f} seconds for a total waiting time of {:4.2f}.'
2720
print(msg.format(duration, sum_waits))
2821
return pages
2922

30-
if __name__ == '__main__':
3123

32-
def main():
33-
"""Test it.
34-
"""
35-
pages = get_multiple_pages(host='localhost', port='8000',
36-
waits=[1, 5, 3, 2])
37-
for page in pages:
38-
print(page)
24+
async def main():
25+
"""Test it."""
26+
pages = await get_multiple_pages(
27+
host='localhost', port='8000', waits=[1, 5, 3, 2])
28+
for page in pages:
29+
print(page)
30+
3931

40-
main()
32+
if __name__ == '__main__':
33+
loop = asyncio.get_event_loop()
34+
loop.run_until_complete(main())
35+
loop.close()

examples/async_page.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
1-
# file: async_page.py
2-
3-
"""Get a "web page" asynchronously.
4-
"""
1+
"""Get a web page asynchronously."""
52

63
import asyncio
74

85
ENCODING = 'ISO-8859-1'
96

107

118
def get_encoding(header):
12-
"""Find out encoding.
13-
"""
9+
"""Find out encoding."""
1410
for line in header:
1511
if line.lstrip().startswith('Content-type'):
1612
for entry in line.split(';'):
@@ -20,8 +16,7 @@ def get_encoding(header):
2016

2117

2218
async def get_page(host, port, wait=0):
23-
"""Get a "web page" asynchronously.
24-
"""
19+
"""Get a web page asynchronously."""
2520
reader, writer = await asyncio.open_connection(host, port)
2621
writer.write(b'\r\n'.join([
2722
'GET /{} HTTP/1.0'.format(wait).encode(ENCODING),

examples/simple_server.py

+5-12
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
# file: simple_server.py
2-
3-
"""Simple HTTP server with GET that waits for given seconds.
4-
"""
1+
"""Simple HTTP server with GET that waits for given seconds."""
52

63
from http.server import BaseHTTPRequestHandler, HTTPServer
74
from socketserver import ThreadingMixIn
@@ -12,18 +9,15 @@
129

1310

1411
class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
15-
"""Simple multi-threaded HTTP server.
16-
"""
12+
"""Simple multi-threaded HTTP server."""
1713
pass
1814

1915

2016
class MyRequestHandler(BaseHTTPRequestHandler):
21-
"""Very simple request handler. Only supports GET.
22-
"""
17+
"""Very simple request handler. Only supports GET."""
2318

2419
def do_GET(self): # pylint: disable=invalid-name
25-
"""Respond after seconds given in path.
26-
"""
20+
"""Respond after seconds given in path."""
2721
try:
2822
seconds = float(self.path[1:])
2923
except ValueError:
@@ -43,8 +37,7 @@ def do_GET(self): # pylint: disable=invalid-name
4337
def run(server_class=ThreadingHTTPServer,
4438
handler_class=MyRequestHandler,
4539
port=8000):
46-
"""Run the simple server on given port.
47-
"""
40+
"""Run the simple server on given port."""
4841
server_address = ('', port)
4942
httpd = server_class(server_address, handler_class)
5043
print('Serving from port {} ...'.format(port))

examples/synchronous_client.py

+12-17
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
1-
"""Synchronous client to retrieve web pages.
2-
"""
1+
"""Synchronous client to retrieve web pages."""
32

4-
5-
from urllib.request import urlopen
63
import time
4+
from urllib.request import urlopen
75

86
ENCODING = 'ISO-8859-1'
97

108

119
def get_encoding(http_response):
12-
"""Find out encoding.
13-
"""
10+
"""Find out encoding."""
1411
content_type = http_response.getheader('Content-type')
1512
for entry in content_type.split(';'):
1613
if entry.strip().startswith('charset'):
@@ -26,12 +23,11 @@ def get_page(host, port, wait=0):
2623
full_url = '{}:{}/{}'.format(host, port, wait)
2724
with urlopen(full_url) as http_response:
2825
html = http_response.read().decode(get_encoding(http_response))
29-
return html
26+
return html.strip('\n')
3027

3128

3229
def get_multiple_pages(host, port, waits, show_time=True):
33-
"""Get multiple pages.
34-
"""
30+
"""Get multiple pages."""
3531
start = time.perf_counter()
3632
pages = [get_page(host, port, wait) for wait in waits]
3733
duration = time.perf_counter() - start
@@ -42,14 +38,13 @@ def get_multiple_pages(host, port, waits, show_time=True):
4238
return pages
4339

4440

45-
if __name__ == '__main__':
41+
def main():
42+
"""Test it."""
43+
pages = get_multiple_pages(
44+
host='http://localhost', port='8000', waits=[1, 5, 3, 2])
45+
for page in pages:
46+
print(page)
4647

47-
def main():
48-
"""Test it.
49-
"""
50-
pages = get_multiple_pages(host='http://localhost', port='8000',
51-
waits=[1, 5, 3, 2])
52-
for page in pages:
53-
print(page)
5448

49+
if __name__ == '__main__':
5550
main()

0 commit comments

Comments
 (0)