Skip to content

Commit

Permalink
Added a few workarounds for HTTP driver's latency issues (#2343)
Browse files Browse the repository at this point in the history
Co-authored-by: Chester Chen <512707+chesterxgchen@users.noreply.github.com>
  • Loading branch information
nvidianz and chesterxgchen authored Feb 2, 2024
1 parent 78e493b commit 5fae920
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 6 deletions.
13 changes: 10 additions & 3 deletions nvflare/fuel/f3/drivers/aio_http_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ async def _async_send_frame(self, frame: BytesAlike):
# This is to yield control. See bug: https://github.com/aaugustin/websockets/issues/865
await asyncio.sleep(0)
except Exception as ex:
log.error(f"Error sending frame for connection {self}: {secure_format_exception(ex)}")
log.error(f"Error sending frame for connection {self}, closing: {secure_format_exception(ex)}")
self.close()


class AioHttpDriver(BaseDriver):
Expand Down Expand Up @@ -184,5 +185,11 @@ async def _handler(self, websocket):
async def _read_loop(conn: WsConnection):
while not conn.closing:
# Reading from websocket and call receiver CB
frame = await conn.websocket.recv()
conn.process_frame(frame)
try:
frame = await conn.websocket.recv()
conn.process_frame(frame)
except ConnectionClosedOK as ex:
raise ex
except Exception as ex:
log.error(f"Exception {type(ex)} on connection {conn}: {ex}")
raise ex
2 changes: 1 addition & 1 deletion nvflare/fuel/f3/streaming/byte_receiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
MAX_OUT_SEQ_CHUNKS = 16
# 1/4 of the window size
ACK_INTERVAL = 1024 * 1024 * 4
READ_TIMEOUT = 60
READ_TIMEOUT = 300
COUNTER_NAME_RECEIVED = "received"


Expand Down
2 changes: 1 addition & 1 deletion nvflare/fuel/f3/streaming/byte_streamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

STREAM_CHUNK_SIZE = 1024 * 1024
STREAM_WINDOW_SIZE = 16 * STREAM_CHUNK_SIZE
STREAM_ACK_WAIT = 10
STREAM_ACK_WAIT = 60

STREAM_TYPE_BYTE = "byte"
STREAM_TYPE_BLOB = "blob"
Expand Down
2 changes: 1 addition & 1 deletion nvflare/private/fed/client/fed_client_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(
cell=cell,
client_register_interval=client_args.get("client_register_interval", 2.0),
timeout=client_args.get("communication_timeout", 30.0),
maint_msg_timeout=client_args.get("maint_msg_timeout", 5.0),
maint_msg_timeout=client_args.get("maint_msg_timeout", 30.0),
)

self.secure_train = secure_train
Expand Down

0 comments on commit 5fae920

Please sign in to comment.