Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: multi nat initialization causing dead lock in waku tests + serialize test runs to avoid timing and port occupied issues #2799

Merged
merged 5 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ jobs:

- name: Run tests
run: |
if [ ${{ runner.os }} == "Linux" ]; then
sudo docker run --rm -d -e POSTGRES_PASSWORD=test123 -p 5432:5432 postgres:15.4-alpine3.18
fi

postgres_enabled=0
if [ ${{ runner.os }} == "Linux" ]; then
sudo docker run --rm -d -e POSTGRES_PASSWORD=test123 -p 5432:5432 postgres:15.4-alpine3.18
postgres_enabled=1
fi

export MAKEFLAGS="-j1"
export NIMFLAGS="--colors:off -d:chronicles_colors:none"

Comment on lines +116 to +122
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That LGTM but shall we add that in a separate PR so that is clear the commit that applied it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That LGTM but shall we add that in a separate PR so that is clear the commit that applied it?

@NagyZoltanPeter - maybe easier to just update the PR title and description to also reflect the CI test change :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:-) agree! I do it.

make RLN_V${{matrix.rln_version}}=true V=1 LOG_LEVEL=DEBUG QUICK_AND_DIRTY_COMPILER=1 POSTGRES=$postgres_enabled test testwakunode2

build-docker-image:
Expand Down
58 changes: 38 additions & 20 deletions waku/common/utils/nat.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ import chronicles, eth/net/nat, stew/results, nativesockets
logScope:
topics = "nat"

## Due to the design of nim-eth/nat module we must ensure it is only initialized once.
## see: https://github.com/waku-org/nwaku/issues/2628
## Details: nim-eth/nat module starts a meaintenance thread for refreshing the NAT mappings, but everything in the module is global,
## there is no room to store multiple configurations.
## Exact meaning: redirectPorts cannot be called twice in a program lifetime.
## During waku tests we happen to start several node instances in parallel thus resulting in multiple NAT configurations and multiple threads.
## Those threads will dead lock each other in tear down.
var singletonNat: bool = false

proc setupNat*(
natConf, clientId: string, tcpPort, udpPort: Port
): Result[
Expand All @@ -26,26 +35,35 @@ proc setupNat*(
tuple[ip: Option[IpAddress], tcpPort: Option[Port], udpPort: Option[Port]]

if strategy != NatNone:
let extIp = getExternalIP(strategy)
if extIP.isSome():
endpoint.ip = some(extIp.get())
# RedirectPorts in considered a gcsafety violation
# because it obtains the address of a non-gcsafe proc?
var extPorts: Option[(Port, Port)]
try:
extPorts = (
{.gcsafe.}:
redirectPorts(tcpPort = tcpPort, udpPort = udpPort, description = clientId)
)
except CatchableError:
# TODO: nat.nim Error: can raise an unlisted exception: Exception. Isolate here for now.
error "unable to determine external ports"
extPorts = none((Port, Port))

if extPorts.isSome():
let (extTcpPort, extUdpPort) = extPorts.get()
endpoint.tcpPort = some(extTcpPort)
endpoint.udpPort = some(extUdpPort)
## Only initialize the NAT module once
## redirectPorts cannot be called twice in a program lifetime.
## We can do it as same happens if getExternalIP fails and returns None
if singletonNat:
warn "NAT already initialized, skipping as cannot be done multiple times"
else:
singletonNat = true
let extIp = getExternalIP(strategy)
if extIP.isSome():
endpoint.ip = some(extIp.get())
# RedirectPorts in considered a gcsafety violation
# because it obtains the address of a non-gcsafe proc?
var extPorts: Option[(Port, Port)]
try:
extPorts = (
{.gcsafe.}:
redirectPorts(
tcpPort = tcpPort, udpPort = udpPort, description = clientId
)
)
except CatchableError:
# TODO: nat.nim Error: can raise an unlisted exception: Exception. Isolate here for now.
error "unable to determine external ports"
extPorts = none((Port, Port))

if extPorts.isSome():
let (extTcpPort, extUdpPort) = extPorts.get()
endpoint.tcpPort = some(extTcpPort)
endpoint.udpPort = some(extUdpPort)
else: # NatNone
if not natConf.startsWith("extip:"):
return err("not a valid NAT mechanism: " & $natConf)
Expand Down
Loading