* UDP PeerTestManager: Throw in some synchronization to try to fix stuck tests

This commit is contained in:
zzz
2009-11-14 15:00:28 +00:00
parent b7ebce48ee
commit 1c25c0f408

View File

@ -19,23 +19,99 @@ import net.i2p.util.SimpleScheduler;
import net.i2p.util.SimpleTimer;
/**
*
* From udp.html on the website:
<p>The automation of collaborative reachability testing for peers is
enabled by a sequence of PeerTest messages. With its proper
execution, a peer will be able to determine their own reachability
and may update its behavior accordingly. The testing process is
quite simple:</p>
<pre>
Alice Bob Charlie
PeerTest -------------------&gt;
PeerTest--------------------&gt;
&lt;-------------------PeerTest
&lt;-------------------PeerTest
&lt;------------------------------------------PeerTest
PeerTest------------------------------------------&gt;
&lt;------------------------------------------PeerTest
</pre>
<p>Each of the PeerTest messages carry a nonce identifying the
test series itself, as initialized by Alice. If Alice doesn't
get a particular message that she expects, she will retransmit
accordingly, and based upon the data received or the messages
missing, she will know her reachability. The various end states
that may be reached are as follows:</p>
<ul>
<li>If she doesn't receive a response from Bob, she will retransmit
up to a certain number of times, but if no response ever arrives,
she will know that her firewall or NAT is somehow misconfigured,
rejecting all inbound UDP packets even in direct response to an
outbound packet. Alternately, Bob may be down or unable to get
Charlie to reply.</li>
<li>If Alice doesn't receive a PeerTest message with the
expected nonce from a third party (Charlie), she will retransmit
her initial request to Bob up to a certain number of times, even
if she has received Bob's reply already. If Charlie's first message
still doesn't get through but Bob's does, she knows that she is
behind a NAT or firewall that is rejecting unsolicited connection
attempts and that port forwarding is not operating properly (the
IP and port that Bob offered up should be forwarded).</li>
<li>If Alice receives Bob's PeerTest message and both of Charlie's
PeerTest messages but the enclosed IP and port numbers in Bob's
and Charlie's second messages don't match, she knows that she is
behind a symmetric NAT, rewriting all of her outbound packets with
different 'from' ports for each peer contacted. She will need to
explicitly forward a port and always have that port exposed for
remote connectivity, ignoring further port discovery.</li>
<li>If Alice receives Charlie's first message but not his second,
she will retransmit her PeerTest message to Charlie up to a
certain number of times, but if no response is received she knows
that Charlie is either confused or no longer online.</li>
</ul>
<p>Alice should choose Bob arbitrarily from known peers who seem
to be capable of participating in peer tests. Bob in turn should
choose Charlie arbitrarily from peers that he knows who seem to be
capable of participating in peer tests and who are on a different
IP from both Bob and Alice. If the first error condition occurs
(Alice doesn't get PeerTest messages from Bob), Alice may decide
to designate a new peer as Bob and try again with a different nonce.</p>
<p>Alice's introduction key is included in all of the PeerTest
messages so that she doesn't need to already have an established
session with Bob and so that Charlie can contact her without knowing
any additional information. Alice may go on to establish a session
with either Bob or Charlie, but it is not required.</p>
*/
class PeerTestManager {
private RouterContext _context;
private Log _log;
private UDPTransport _transport;
private PacketBuilder _packetBuilder;
/** map of Long(nonce) to PeerTestState for tests currently in progress */
private final Map _activeTests;
/** current test we are running, or null */
/** map of Long(nonce) to PeerTestState for tests currently in progress (as Bob/Charlie) */
private final Map<Long, PeerTestState> _activeTests;
/** current test we are running (as Alice), or null */
private PeerTestState _currentTest;
private boolean _currentTestComplete;
private List _recentTests;
/** as Alice */
private List<Long> _recentTests;
/** longest we will keep track of a Charlie nonce for */
private static final int MAX_CHARLIE_LIFETIME = 10*1000;
/**
* Have seen peer tests (as Alice) get stuck (_currentTest != null)
* so I've thrown some synchronizization on the methods;
* don't know the root cause or whether this fixes it
*/
public PeerTestManager(RouterContext context, UDPTransport transport) {
_context = context;
_transport = transport;
@ -54,7 +130,11 @@ class PeerTestManager {
private static final int MAX_TEST_TIME = 30*1000;
private static final long MAX_NONCE = (1l << 32) - 1l;
//public void runTest(InetAddress bobIP, int bobPort, SessionKey bobIntroKey) {
public void runTest(InetAddress bobIP, int bobPort, SessionKey bobCipherKey, SessionKey bobMACKey) {
/**
* The next few methods are for when we are Alice
*/
public synchronized void runTest(InetAddress bobIP, int bobPort, SessionKey bobCipherKey, SessionKey bobMACKey) {
if (_currentTest != null) {
if (_log.shouldLog(Log.WARN))
_log.warn("We are already running a test with bob = " + _currentTest.getBobIP() + ", aborting test with bob = " + bobIP);
@ -85,30 +165,33 @@ class PeerTestManager {
private class ContinueTest implements SimpleTimer.TimedEvent {
public void timeReached() {
PeerTestState state = _currentTest;
if (state == null) {
// already completed
return;
} else if (expired()) {
testComplete(true);
} else if (_context.clock().now() - state.getLastSendTime() >= RESEND_TIMEOUT) {
if (state.getReceiveBobTime() <= 0) {
// no message from Bob yet, send it again
sendTestToBob();
} else if (state.getReceiveCharlieTime() <= 0) {
// received from Bob, but no reply from Charlie. send it to
// Bob again so he pokes Charlie
sendTestToBob();
} else {
// received from both Bob and Charlie, but we haven't received a
// second message from Charlie yet
sendTestToCharlie();
synchronized (PeerTestManager.this) {
PeerTestState state = _currentTest;
if (state == null) {
// already completed
return;
} else if (expired()) {
testComplete(true);
} else if (_context.clock().now() - state.getLastSendTime() >= RESEND_TIMEOUT) {
if (state.getReceiveBobTime() <= 0) {
// no message from Bob yet, send it again
sendTestToBob();
} else if (state.getReceiveCharlieTime() <= 0) {
// received from Bob, but no reply from Charlie. send it to
// Bob again so he pokes Charlie
sendTestToBob();
} else {
// received from both Bob and Charlie, but we haven't received a
// second message from Charlie yet
sendTestToCharlie();
}
SimpleScheduler.getInstance().addEvent(ContinueTest.this, RESEND_TIMEOUT);
}
SimpleScheduler.getInstance().addEvent(ContinueTest.this, RESEND_TIMEOUT);
}
}
}
/** call from a synchronized method */
private boolean expired() {
PeerTestState state = _currentTest;
if (state != null)
@ -117,6 +200,7 @@ class PeerTestManager {
return true;
}
/** call from a synchronized method */
private void sendTestToBob() {
PeerTestState test = _currentTest;
if (!expired()) {
@ -128,6 +212,7 @@ class PeerTestManager {
_currentTest = null;
}
}
/** call from a synchronized method */
private void sendTestToCharlie() {
PeerTestState test = _currentTest;
if (!expired()) {
@ -153,9 +238,9 @@ class PeerTestManager {
/**
* Receive a PeerTest message which contains the correct nonce for our current
* test
* test. We are Alice.
*/
private void receiveTestReply(RemoteHostId from, UDPPacketReader.PeerTestReader testInfo) {
private synchronized void receiveTestReply(RemoteHostId from, UDPPacketReader.PeerTestReader testInfo) {
_context.statManager().addRateData("udp.receiveTestReply", 1, 0);
PeerTestState test = _currentTest;
if (expired())
@ -208,6 +293,7 @@ class PeerTestManager {
if (_log.shouldLog(Log.WARN))
_log.warn("Bob chose a charlie we already have a session to, cancelling the test and rerunning (bob: "
+ _currentTest + ", charlie: " + from + ")");
// why are we doing this instead of calling testComplete() ?
_currentTestComplete = true;
_context.statManager().addRateData("udp.statusKnownCharlie", 1, 0);
honorStatus(CommSystemFacade.STATUS_UNKNOWN);
@ -267,6 +353,9 @@ class PeerTestManager {
* Evaluate the info we have and act accordingly, since the test has either timed out or
* we have successfully received the second PeerTest from a Charlie.
*
* @param forgetTest must be true to clear out this test and allow another
*
* call from a synchronized method
*/
private void testComplete(boolean forgetTest) {
_currentTestComplete = true;
@ -324,7 +413,7 @@ class PeerTestManager {
* Receive a test message of some sort from the given peer, queueing up any packet
* that should be sent in response, or if its a reply to our own current testing,
* adjusting our test state.
*
* We could be Alice, Bob, or Charlie.
*/
public void receiveTest(RemoteHostId from, UDPPacketReader reader) {
_context.statManager().addRateData("udp.receiveTest", 1, 0);
@ -334,10 +423,13 @@ class PeerTestManager {
long nonce = testInfo.readNonce();
PeerTestState test = _currentTest;
if ( (test != null) && (test.getNonce() == nonce) ) {
// we are Alice
receiveTestReply(from, testInfo);
return;
}
// we are Bob or Charlie
if ( (testInfo.readIPSize() > 0) && (testPort > 0) ) {
testIP = new byte[testInfo.readIPSize()];
testInfo.readIP(testIP, 0);
@ -360,7 +452,7 @@ class PeerTestManager {
// initiated test
} else {
if (_log.shouldLog(Log.DEBUG))
_log.debug("We are charlie, as te testIP/port is " + RemoteHostId.toString(testIP) + ":" + testPort + " and the state is unknown for " + nonce);
_log.debug("We are charlie, as the testIP/port is " + RemoteHostId.toString(testIP) + ":" + testPort + " and the state is unknown for " + nonce);
// we are charlie, since alice never sends us her IP and port, only bob does (and,
// erm, we're not alice, since it isn't our nonce)
receiveFromBobAsCharlie(from, testInfo, nonce, null);
@ -388,6 +480,8 @@ class PeerTestManager {
}
}
// Below here are methods for when we are Bob or Charlie
private static final int MAX_RELAYED_PER_TEST = 5;
/**