* UDP PeerTestManager: Throw in some synchronization to try to fix stuck tests
This commit is contained in:
@ -19,23 +19,99 @@ import net.i2p.util.SimpleScheduler;
|
||||
import net.i2p.util.SimpleTimer;
|
||||
|
||||
/**
|
||||
*
|
||||
* From udp.html on the website:
|
||||
|
||||
<p>The automation of collaborative reachability testing for peers is
|
||||
enabled by a sequence of PeerTest messages. With its proper
|
||||
execution, a peer will be able to determine their own reachability
|
||||
and may update its behavior accordingly. The testing process is
|
||||
quite simple:</p>
|
||||
|
||||
<pre>
|
||||
Alice Bob Charlie
|
||||
PeerTest ------------------->
|
||||
PeerTest-------------------->
|
||||
<-------------------PeerTest
|
||||
<-------------------PeerTest
|
||||
<------------------------------------------PeerTest
|
||||
PeerTest------------------------------------------>
|
||||
<------------------------------------------PeerTest
|
||||
</pre>
|
||||
|
||||
<p>Each of the PeerTest messages carry a nonce identifying the
|
||||
test series itself, as initialized by Alice. If Alice doesn't
|
||||
get a particular message that she expects, she will retransmit
|
||||
accordingly, and based upon the data received or the messages
|
||||
missing, she will know her reachability. The various end states
|
||||
that may be reached are as follows:</p>
|
||||
|
||||
<ul>
|
||||
<li>If she doesn't receive a response from Bob, she will retransmit
|
||||
up to a certain number of times, but if no response ever arrives,
|
||||
she will know that her firewall or NAT is somehow misconfigured,
|
||||
rejecting all inbound UDP packets even in direct response to an
|
||||
outbound packet. Alternately, Bob may be down or unable to get
|
||||
Charlie to reply.</li>
|
||||
|
||||
<li>If Alice doesn't receive a PeerTest message with the
|
||||
expected nonce from a third party (Charlie), she will retransmit
|
||||
her initial request to Bob up to a certain number of times, even
|
||||
if she has received Bob's reply already. If Charlie's first message
|
||||
still doesn't get through but Bob's does, she knows that she is
|
||||
behind a NAT or firewall that is rejecting unsolicited connection
|
||||
attempts and that port forwarding is not operating properly (the
|
||||
IP and port that Bob offered up should be forwarded).</li>
|
||||
|
||||
<li>If Alice receives Bob's PeerTest message and both of Charlie's
|
||||
PeerTest messages but the enclosed IP and port numbers in Bob's
|
||||
and Charlie's second messages don't match, she knows that she is
|
||||
behind a symmetric NAT, rewriting all of her outbound packets with
|
||||
different 'from' ports for each peer contacted. She will need to
|
||||
explicitly forward a port and always have that port exposed for
|
||||
remote connectivity, ignoring further port discovery.</li>
|
||||
|
||||
<li>If Alice receives Charlie's first message but not his second,
|
||||
she will retransmit her PeerTest message to Charlie up to a
|
||||
certain number of times, but if no response is received she knows
|
||||
that Charlie is either confused or no longer online.</li>
|
||||
</ul>
|
||||
|
||||
<p>Alice should choose Bob arbitrarily from known peers who seem
|
||||
to be capable of participating in peer tests. Bob in turn should
|
||||
choose Charlie arbitrarily from peers that he knows who seem to be
|
||||
capable of participating in peer tests and who are on a different
|
||||
IP from both Bob and Alice. If the first error condition occurs
|
||||
(Alice doesn't get PeerTest messages from Bob), Alice may decide
|
||||
to designate a new peer as Bob and try again with a different nonce.</p>
|
||||
|
||||
<p>Alice's introduction key is included in all of the PeerTest
|
||||
messages so that she doesn't need to already have an established
|
||||
session with Bob and so that Charlie can contact her without knowing
|
||||
any additional information. Alice may go on to establish a session
|
||||
with either Bob or Charlie, but it is not required.</p>
|
||||
|
||||
*/
|
||||
class PeerTestManager {
|
||||
private RouterContext _context;
|
||||
private Log _log;
|
||||
private UDPTransport _transport;
|
||||
private PacketBuilder _packetBuilder;
|
||||
/** map of Long(nonce) to PeerTestState for tests currently in progress */
|
||||
private final Map _activeTests;
|
||||
/** current test we are running, or null */
|
||||
/** map of Long(nonce) to PeerTestState for tests currently in progress (as Bob/Charlie) */
|
||||
private final Map<Long, PeerTestState> _activeTests;
|
||||
/** current test we are running (as Alice), or null */
|
||||
private PeerTestState _currentTest;
|
||||
private boolean _currentTestComplete;
|
||||
private List _recentTests;
|
||||
/** as Alice */
|
||||
private List<Long> _recentTests;
|
||||
|
||||
/** longest we will keep track of a Charlie nonce for */
|
||||
private static final int MAX_CHARLIE_LIFETIME = 10*1000;
|
||||
|
||||
/**
|
||||
* Have seen peer tests (as Alice) get stuck (_currentTest != null)
|
||||
* so I've thrown some synchronizization on the methods;
|
||||
* don't know the root cause or whether this fixes it
|
||||
*/
|
||||
public PeerTestManager(RouterContext context, UDPTransport transport) {
|
||||
_context = context;
|
||||
_transport = transport;
|
||||
@ -54,7 +130,11 @@ class PeerTestManager {
|
||||
private static final int MAX_TEST_TIME = 30*1000;
|
||||
private static final long MAX_NONCE = (1l << 32) - 1l;
|
||||
//public void runTest(InetAddress bobIP, int bobPort, SessionKey bobIntroKey) {
|
||||
public void runTest(InetAddress bobIP, int bobPort, SessionKey bobCipherKey, SessionKey bobMACKey) {
|
||||
|
||||
/**
|
||||
* The next few methods are for when we are Alice
|
||||
*/
|
||||
public synchronized void runTest(InetAddress bobIP, int bobPort, SessionKey bobCipherKey, SessionKey bobMACKey) {
|
||||
if (_currentTest != null) {
|
||||
if (_log.shouldLog(Log.WARN))
|
||||
_log.warn("We are already running a test with bob = " + _currentTest.getBobIP() + ", aborting test with bob = " + bobIP);
|
||||
@ -85,30 +165,33 @@ class PeerTestManager {
|
||||
|
||||
private class ContinueTest implements SimpleTimer.TimedEvent {
|
||||
public void timeReached() {
|
||||
PeerTestState state = _currentTest;
|
||||
if (state == null) {
|
||||
// already completed
|
||||
return;
|
||||
} else if (expired()) {
|
||||
testComplete(true);
|
||||
} else if (_context.clock().now() - state.getLastSendTime() >= RESEND_TIMEOUT) {
|
||||
if (state.getReceiveBobTime() <= 0) {
|
||||
// no message from Bob yet, send it again
|
||||
sendTestToBob();
|
||||
} else if (state.getReceiveCharlieTime() <= 0) {
|
||||
// received from Bob, but no reply from Charlie. send it to
|
||||
// Bob again so he pokes Charlie
|
||||
sendTestToBob();
|
||||
} else {
|
||||
// received from both Bob and Charlie, but we haven't received a
|
||||
// second message from Charlie yet
|
||||
sendTestToCharlie();
|
||||
synchronized (PeerTestManager.this) {
|
||||
PeerTestState state = _currentTest;
|
||||
if (state == null) {
|
||||
// already completed
|
||||
return;
|
||||
} else if (expired()) {
|
||||
testComplete(true);
|
||||
} else if (_context.clock().now() - state.getLastSendTime() >= RESEND_TIMEOUT) {
|
||||
if (state.getReceiveBobTime() <= 0) {
|
||||
// no message from Bob yet, send it again
|
||||
sendTestToBob();
|
||||
} else if (state.getReceiveCharlieTime() <= 0) {
|
||||
// received from Bob, but no reply from Charlie. send it to
|
||||
// Bob again so he pokes Charlie
|
||||
sendTestToBob();
|
||||
} else {
|
||||
// received from both Bob and Charlie, but we haven't received a
|
||||
// second message from Charlie yet
|
||||
sendTestToCharlie();
|
||||
}
|
||||
SimpleScheduler.getInstance().addEvent(ContinueTest.this, RESEND_TIMEOUT);
|
||||
}
|
||||
SimpleScheduler.getInstance().addEvent(ContinueTest.this, RESEND_TIMEOUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** call from a synchronized method */
|
||||
private boolean expired() {
|
||||
PeerTestState state = _currentTest;
|
||||
if (state != null)
|
||||
@ -117,6 +200,7 @@ class PeerTestManager {
|
||||
return true;
|
||||
}
|
||||
|
||||
/** call from a synchronized method */
|
||||
private void sendTestToBob() {
|
||||
PeerTestState test = _currentTest;
|
||||
if (!expired()) {
|
||||
@ -128,6 +212,7 @@ class PeerTestManager {
|
||||
_currentTest = null;
|
||||
}
|
||||
}
|
||||
/** call from a synchronized method */
|
||||
private void sendTestToCharlie() {
|
||||
PeerTestState test = _currentTest;
|
||||
if (!expired()) {
|
||||
@ -153,9 +238,9 @@ class PeerTestManager {
|
||||
|
||||
/**
|
||||
* Receive a PeerTest message which contains the correct nonce for our current
|
||||
* test
|
||||
* test. We are Alice.
|
||||
*/
|
||||
private void receiveTestReply(RemoteHostId from, UDPPacketReader.PeerTestReader testInfo) {
|
||||
private synchronized void receiveTestReply(RemoteHostId from, UDPPacketReader.PeerTestReader testInfo) {
|
||||
_context.statManager().addRateData("udp.receiveTestReply", 1, 0);
|
||||
PeerTestState test = _currentTest;
|
||||
if (expired())
|
||||
@ -208,6 +293,7 @@ class PeerTestManager {
|
||||
if (_log.shouldLog(Log.WARN))
|
||||
_log.warn("Bob chose a charlie we already have a session to, cancelling the test and rerunning (bob: "
|
||||
+ _currentTest + ", charlie: " + from + ")");
|
||||
// why are we doing this instead of calling testComplete() ?
|
||||
_currentTestComplete = true;
|
||||
_context.statManager().addRateData("udp.statusKnownCharlie", 1, 0);
|
||||
honorStatus(CommSystemFacade.STATUS_UNKNOWN);
|
||||
@ -267,6 +353,9 @@ class PeerTestManager {
|
||||
* Evaluate the info we have and act accordingly, since the test has either timed out or
|
||||
* we have successfully received the second PeerTest from a Charlie.
|
||||
*
|
||||
* @param forgetTest must be true to clear out this test and allow another
|
||||
*
|
||||
* call from a synchronized method
|
||||
*/
|
||||
private void testComplete(boolean forgetTest) {
|
||||
_currentTestComplete = true;
|
||||
@ -324,7 +413,7 @@ class PeerTestManager {
|
||||
* Receive a test message of some sort from the given peer, queueing up any packet
|
||||
* that should be sent in response, or if its a reply to our own current testing,
|
||||
* adjusting our test state.
|
||||
*
|
||||
* We could be Alice, Bob, or Charlie.
|
||||
*/
|
||||
public void receiveTest(RemoteHostId from, UDPPacketReader reader) {
|
||||
_context.statManager().addRateData("udp.receiveTest", 1, 0);
|
||||
@ -334,10 +423,13 @@ class PeerTestManager {
|
||||
long nonce = testInfo.readNonce();
|
||||
PeerTestState test = _currentTest;
|
||||
if ( (test != null) && (test.getNonce() == nonce) ) {
|
||||
// we are Alice
|
||||
receiveTestReply(from, testInfo);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// we are Bob or Charlie
|
||||
|
||||
if ( (testInfo.readIPSize() > 0) && (testPort > 0) ) {
|
||||
testIP = new byte[testInfo.readIPSize()];
|
||||
testInfo.readIP(testIP, 0);
|
||||
@ -360,7 +452,7 @@ class PeerTestManager {
|
||||
// initiated test
|
||||
} else {
|
||||
if (_log.shouldLog(Log.DEBUG))
|
||||
_log.debug("We are charlie, as te testIP/port is " + RemoteHostId.toString(testIP) + ":" + testPort + " and the state is unknown for " + nonce);
|
||||
_log.debug("We are charlie, as the testIP/port is " + RemoteHostId.toString(testIP) + ":" + testPort + " and the state is unknown for " + nonce);
|
||||
// we are charlie, since alice never sends us her IP and port, only bob does (and,
|
||||
// erm, we're not alice, since it isn't our nonce)
|
||||
receiveFromBobAsCharlie(from, testInfo, nonce, null);
|
||||
@ -388,6 +480,8 @@ class PeerTestManager {
|
||||
}
|
||||
}
|
||||
|
||||
// Below here are methods for when we are Bob or Charlie
|
||||
|
||||
private static final int MAX_RELAYED_PER_TEST = 5;
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user