-
Notifications
You must be signed in to change notification settings - Fork 12
parallelize dialbacks #20
Changes from all commits
67d2dcb
88875a7
de44e81
8244eed
f645f11
2584eaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ var ( | |
AutoNATBootDelay = 15 * time.Second | ||
AutoNATRetryInterval = 90 * time.Second | ||
AutoNATRefreshInterval = 15 * time.Minute | ||
AutoNATRequestTimeout = 60 * time.Second | ||
AutoNATRequestTimeout = 30 * time.Second | ||
) | ||
|
||
// AutoNAT is the interface for ambient NAT autodiscovery | ||
|
@@ -135,50 +135,84 @@ func (as *AmbientAutoNAT) autodetect() { | |
} | ||
|
||
cli := NewAutoNATClient(as.host, as.getAddrs) | ||
failures := 0 | ||
|
||
for _, pi := range peers { | ||
ctx, cancel := context.WithTimeout(as.ctx, AutoNATRequestTimeout) | ||
as.host.Peerstore().AddAddrs(pi.ID, pi.Addrs, pstore.TempAddrTTL) | ||
a, err := cli.DialBack(ctx, pi.ID) | ||
cancel() | ||
|
||
switch { | ||
case err == nil: | ||
log.Debugf("NAT status is public; address through %s: %s", pi.ID.Pretty(), a.String()) | ||
as.mx.Lock() | ||
as.addr = a | ||
as.status = NATStatusPublic | ||
as.confidence = 0 | ||
as.mx.Unlock() | ||
return | ||
ctx, cancel := context.WithTimeout(as.ctx, AutoNATRequestTimeout) | ||
defer cancel() | ||
|
||
var result struct { | ||
sync.Mutex | ||
private int | ||
public int | ||
pubaddr ma.Multiaddr | ||
} | ||
|
||
case IsDialError(err): | ||
log.Debugf("dial error through %s: %s", pi.ID.Pretty(), err.Error()) | ||
failures++ | ||
if failures >= 3 || as.confidence >= 3 { // 3 times is enemy action | ||
log.Debugf("NAT status is private") | ||
as.mx.Lock() | ||
as.status = NATStatusPrivate | ||
as.confidence = 3 | ||
as.mx.Unlock() | ||
return | ||
} | ||
probe := 3 - as.confidence | ||
if probe == 0 { | ||
probe = 1 | ||
} | ||
if probe > len(peers) { | ||
probe = len(peers) | ||
} | ||
|
||
default: | ||
log.Debugf("Error dialing through %s: %s", pi.ID.Pretty(), err.Error()) | ||
} | ||
var wg sync.WaitGroup | ||
|
||
for _, pi := range peers[:probe] { | ||
wg.Add(1) | ||
go func(pi pstore.PeerInfo) { | ||
defer wg.Done() | ||
|
||
as.host.Peerstore().AddAddrs(pi.ID, pi.Addrs, pstore.TempAddrTTL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note to self: using |
||
a, err := cli.DialBack(ctx, pi.ID) | ||
|
||
switch { | ||
case err == nil: | ||
log.Debugf("Dialback through %s successful; public address is %s", pi.ID.Pretty(), a.String()) | ||
result.Lock() | ||
result.public++ | ||
result.pubaddr = a | ||
result.Unlock() | ||
|
||
case IsDialError(err): | ||
log.Debugf("Dialback through %s failed", pi.ID.Pretty()) | ||
result.Lock() | ||
result.private++ | ||
result.Unlock() | ||
|
||
default: | ||
log.Debugf("Dialback error through %s: %s", pi.ID.Pretty(), err) | ||
} | ||
}(pi) | ||
} | ||
|
||
wg.Wait() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to wait for all of them? That is, could we like, try N and wait for M? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How would this work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ask N nodes for dial backs but walk away after M responses. It may not be worth it but it would allow us to tolerate a few bad nodes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: I'm find merging this as-is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it adds much to wait for fewer nodes, other than adding complexity. At worst a bad node will timeout, which is 30s. |
||
|
||
as.mx.Lock() | ||
if failures > 0 { | ||
as.status = NATStatusPrivate | ||
as.confidence++ | ||
if result.public > 0 { | ||
log.Debugf("NAT status is public") | ||
if as.status == NATStatusPrivate { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A comment here saying that we're flipping our asserted NAT status? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A log comment I take it. Sure, will do. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
// we are flipping our NATStatus, so confidence drops to 0 | ||
as.confidence = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Shouldn't this be a confidence of 1? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer this to have a confidence of 0 if there is a flip, so that we get more observations in the next round. |
||
} else if as.confidence < 3 { | ||
as.confidence++ | ||
} | ||
as.status = NATStatusPublic | ||
as.addr = result.pubaddr | ||
} else if result.private > 0 { | ||
log.Debugf("NAT status is private") | ||
if as.status == NATStatusPublic { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
// we are flipping our NATStatus, so confidence drops to 0 | ||
as.confidence = 0 | ||
} else if as.confidence < 3 { | ||
as.confidence++ | ||
} | ||
as.status = NATStatusPrivate | ||
as.addr = nil | ||
} else if as.confidence > 0 { | ||
// don't just flip to unknown, reduce confidence first | ||
as.confidence-- | ||
} else { | ||
as.status = NATStatusUnknown | ||
as.confidence = 0 | ||
log.Debugf("NAT status is unknown") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it ok for a single round with unresponsive peers to reset our NAT status entirely? I think we need some error tolerance here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd decrement the confidence and retain our previous status, until confidence drops to zero and then we can switch to Unknown? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, that's reasonable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done - won't flip to unknown if we have any confidence, but rather reduce the confidence and wait for the next round. |
||
as.status = NATStatusUnknown | ||
as.addr = nil | ||
} | ||
as.mx.Unlock() | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could do:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, can do if you think it's cleaner this way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is a problem with doing it this way: if peers = 0 and probe is also 0 (confidence = 3), then we'll have an out of bounds slice.
then again we can't possibly have no peers and confidence = 3, so it should be ok.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but I'd rather not have to think about this when reading the code (apparent bug that is not)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'll keep the original code here, as it needs no comments.