Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
28cb974
rename http.py to client.py due to causing No Module Found error for …
SgtMarmite Jan 19, 2023
9913dfd
httpx initial implementation
SgtMarmite Jan 20, 2023
1429b68
methods update, post, patch, delete
SgtMarmite Mar 7, 2023
162833d
update methods
SgtMarmite Mar 16, 2023
8d4a9ac
httpx updates
SgtMarmite Apr 21, 2023
00546a9
add raw methods
SgtMarmite Apr 21, 2023
7006391
update client reference
SgtMarmite Apr 21, 2023
c0e2bba
cleanup, readme update
SgtMarmite May 23, 2023
e98f81b
move examples to separate folder
SgtMarmite May 23, 2023
2065246
add new tests
SgtMarmite May 24, 2023
3310379
update tests
SgtMarmite May 24, 2023
7dcdd61
finalize tests
SgtMarmite May 25, 2023
35033cc
add Storage API example to docs
SgtMarmite May 25, 2023
7406d34
update backoff calculation
SgtMarmite May 25, 2023
c294a6a
remove unused imports/installs
SgtMarmite May 25, 2023
8033dc9
remove more unused imports/installs
SgtMarmite May 25, 2023
5c29ef2
add update_auth_header method + tests
SgtMarmite May 26, 2023
3c54755
Update async_storage_client.py
SgtMarmite May 31, 2023
fedba9c
add correct typehints
SgtMarmite May 31, 2023
b387e31
enable rate limiting using aiolimiter
SgtMarmite Jun 1, 2023
56bb576
add process_multiple method and example
SgtMarmite Jun 2, 2023
865a711
refactor retry strategy
SgtMarmite Jun 14, 2023
7270fa1
fix tests
SgtMarmite Sep 7, 2023
677b0f5
add workaround to disable INFO msgs coming from httpx library
SgtMarmite Oct 27, 2023
f8f4d96
add retry warning message
SgtMarmite Oct 28, 2023
1ccea91
reduce debug messages
SgtMarmite Oct 30, 2023
2acc6bc
remove debug param
SgtMarmite Oct 30, 2023
e269ddf
revert logging setup
SgtMarmite Oct 30, 2023
90edc2a
silence httpcore debug messages
SgtMarmite Oct 30, 2023
782f4d5
reenable debug
Jul 29, 2024
c663646
Update async_client.py
Jul 29, 2024
91f5233
Update async_client.py
Jul 29, 2024
4f1b061
bump httpx version
Jul 29, 2024
cfe3c9b
make max_requests per second float
Sep 12, 2024
6ec6c37
added detailed exception message
kudj Nov 7, 2024
6c59f8a
added detailed exception message
kudj Nov 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,7 @@ dmypy.json
.idea/

# MacOS files
.DS_Store
.DS_Store

# Local test script
/docs/examples/test.py
72 changes: 71 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,74 @@ class KBCStorageClient(HttpClient):
cl = KBCStorageClient("my_token")

print(cl.get_files())
```
```

## Async Usage

The package also provides an asynchronous version of the HTTP client called AsyncHttpClient.
It allows you to make asynchronous requests using async/await syntax. To use the AsyncHttpClient, import it from keboola.http_client_async:

```python
from keboola.http_client import AsyncHttpClient
```

The AsyncHttpClient class provides similar functionality as the HttpClient class, but with asynchronous methods such as get, post, put, patch, and delete that return awaitable coroutines.
You can use these methods within async functions to perform non-blocking HTTP requests.

```python
import asyncio
from keboola.http_client import AsyncHttpClient

async def main():
base_url = "https://api.example.com/"
async with AsyncHttpClient(base_url) as client:
response = await client.get("endpoint")

if response.status_code == 200:
data = response.json()
# Process the response data
else:
# Handle the error

asyncio.run(main())
```

The AsyncHttpClient provides similar initialization and request methods as the HttpClient.
The request methods return awaitable coroutines that can be awaited in an asynchronous context.

#### Building HTTP client based on AsyncHttpClient Example
This example demonstrates the default use of the HTTPClient as a base for REST API clients.

```python
import asyncio
from keboola.http_client import AsyncHttpClient

BASE_URL = 'https://connection.keboola.com/v2/storage'
MAX_RETRIES = 3

class KBCStorageClient(AsyncHttpClient):

def __init__(self, storage_token):
AsyncHttpClient.__init__(
self,
base_url=BASE_URL,
retries=MAX_RETRIES,
backoff_factor=0.3,
retry_status_codes=[429, 500, 502, 504],
auth_header={"X-StorageApi-Token": storage_token}
)

async def get_files(self, show_expired=False):
params = {"showExpired": show_expired}
response = await self.get('tables', params=params, timeout=5)
return response

async def main():
cl = KBCStorageClient("my_token")
files = await cl.get_files(show_expired=False)
print(files)

asyncio.run(main())
```
**Note:** Since there are no parallel requests being made, you won't notice any speedup for this use case.
For an example where you can see the speedup thanks to async requests, you can view the pokeapi.py in docs/examples.
61 changes: 61 additions & 0 deletions docs/examples/poekapi_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import time
import asyncio
from keboola.http_client import AsyncHttpClient
import csv
import httpx
import os


async def fetch_pokemon(client, poke_id):
try:
r = await client.get(str(poke_id))
return r
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
return None
else:
raise


async def save_to_csv(details):
filename = "pokemon_details.csv"
fieldnames = ["name", "height", "weight"]

file_exists = os.path.isfile(filename)
mode = "a" if file_exists else "w"

with open(filename, mode, newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

if not file_exists:
writer.writeheader()

writer.writerow({
"name": details["name"],
"height": details["height"],
"weight": details["weight"]
})


async def main_async():
base_url = "https://pokeapi.co/api/v2/pokemon/"
start_time = time.time()

async with AsyncHttpClient(base_url=base_url, max_requests_per_second=20) as c:
poke_id = 1

while True:
details = await fetch_pokemon(c, poke_id)
if details is None:
break

await save_to_csv(details)

poke_id += 1

end_time = time.time()
print(f"Async: Fetched details for {poke_id - 1} Pokémon in {end_time - start_time:.2f} seconds.")


if __name__ == "__main__":
asyncio.run(main_async())
44 changes: 44 additions & 0 deletions docs/examples/pokeapi_process_multiple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import asyncio
import csv
import time
from typing import List

from keboola.http_client import AsyncHttpClient


def generate_jobs(nr_of_jobs):
return [{'method': 'GET', 'endpoint': str(endpoint)} for endpoint in range(1, nr_of_jobs+1)]

def save_to_csv(results: List[dict]):
filename = "pokemon_details.csv"
fieldnames = ["name", "height", "weight"] # Define the fields you want to store

with open(filename, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for result in results:
writer.writerow({
"name": result["name"],
"height": result["height"],
"weight": result["weight"]
})

async def main_async():
base_url = "https://pokeapi.co/api/v2/pokemon/"
start_time = time.time()

client = AsyncHttpClient(base_url=base_url, max_requests_per_second=20)

jobs = generate_jobs(1000)

results = await client.process_multiple(jobs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to achieve this without having the main_async function async? E.g. something like this My idea was to have a function, that would run and wait for multiple jobs and just returned all the results (with the disadvantage of having to store all results in memory). Then I would just call something like results = client.run_multiple_sync() in my sync function. Is that possible?

Also I somehow dislike the name process_multiple what does it do? It actually sends multiple requests (not jobs) and returns list of result futures right? The request is basically the parameters of _request_raw http.py equivalent. I think it should be named something like run_multiple_requests(). Also I can imagine having equivalent for each method, e.g. post_multiple

await client.close()

end_time = time.time()
print(f"Fetched details for {len(results)} Pokémon in {end_time - start_time:.2f} seconds.")

save_to_csv(results)


if __name__ == "__main__":
asyncio.run(main_async())
20 changes: 20 additions & 0 deletions docs/examples/storage_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from keboola.http_client import HttpClient

BASE_URL = 'https://connection.keboola.com/v2/storage'
MAX_RETRIES = 3


class KBCStorageClient(HttpClient):

def __init__(self, storage_token):
HttpClient.__init__(self, base_url=BASE_URL, max_retries=MAX_RETRIES, backoff_factor=0.3,
status_forcelist=(429, 500, 502, 504),
default_http_header={"X-StorageApi-Token": storage_token})

def get_files(self, show_expired=None):
params = {"include": show_expired}
return self.get('tables', params=params, timeout=5)

cl = KBCStorageClient("my_token")

print(cl.get_files())
28 changes: 28 additions & 0 deletions docs/examples/storage_client_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import asyncio
from keboola.http_client import AsyncHttpClient

BASE_URL = 'https://connection.keboola.com/v2/storage'
MAX_RETRIES = 3

class KBCStorageClient(AsyncHttpClient):

def __init__(self, storage_token):
super().__init__(
base_url=BASE_URL,
retries=MAX_RETRIES,
backoff_factor=0.3,
retry_status_codes=[429, 500, 502, 504],
auth_header={"X-StorageApi-Token": storage_token}
)

async def get_files(self, show_expired=False):
params = {"showExpired": show_expired}
response = await self.get('tables', params=params, timeout=5)
return response

async def main():
cl = KBCStorageClient("my_token")
files = await cl.get_files(show_expired=False)
print(files)

asyncio.run(main())
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
requests
requests
httpx==0.27.0
aiolimiter==1.1.0
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
setup_requires=['pytest-runner', 'flake8'],
tests_require=['pytest'],
install_requires=[
'requests'
'requests',
'httpx'
],
author_email="support@keboola.com",
description="General HTTP requests library for Python applications running in Keboola Connection environment",
Expand Down
3 changes: 2 additions & 1 deletion src/keboola/http_client/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .http import HttpClient # noqa
from .client import HttpClient # noqa
from .async_client import AsyncHttpClient # noqa
Loading
Loading