* UDP PeerTestManager: Throw in some synchronization to try to fix stuck tests
This commit is contained in:
@ -19,23 +19,99 @@ import net.i2p.util.SimpleScheduler;
|
|||||||
import net.i2p.util.SimpleTimer;
|
import net.i2p.util.SimpleTimer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* From udp.html on the website:
|
||||||
|
|
||||||
|
<p>The automation of collaborative reachability testing for peers is
|
||||||
|
enabled by a sequence of PeerTest messages. With its proper
|
||||||
|
execution, a peer will be able to determine their own reachability
|
||||||
|
and may update its behavior accordingly. The testing process is
|
||||||
|
quite simple:</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
Alice Bob Charlie
|
||||||
|
PeerTest ------------------->
|
||||||
|
PeerTest-------------------->
|
||||||
|
<-------------------PeerTest
|
||||||
|
<-------------------PeerTest
|
||||||
|
<------------------------------------------PeerTest
|
||||||
|
PeerTest------------------------------------------>
|
||||||
|
<------------------------------------------PeerTest
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>Each of the PeerTest messages carry a nonce identifying the
|
||||||
|
test series itself, as initialized by Alice. If Alice doesn't
|
||||||
|
get a particular message that she expects, she will retransmit
|
||||||
|
accordingly, and based upon the data received or the messages
|
||||||
|
missing, she will know her reachability. The various end states
|
||||||
|
that may be reached are as follows:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>If she doesn't receive a response from Bob, she will retransmit
|
||||||
|
up to a certain number of times, but if no response ever arrives,
|
||||||
|
she will know that her firewall or NAT is somehow misconfigured,
|
||||||
|
rejecting all inbound UDP packets even in direct response to an
|
||||||
|
outbound packet. Alternately, Bob may be down or unable to get
|
||||||
|
Charlie to reply.</li>
|
||||||
|
|
||||||
|
<li>If Alice doesn't receive a PeerTest message with the
|
||||||
|
expected nonce from a third party (Charlie), she will retransmit
|
||||||
|
her initial request to Bob up to a certain number of times, even
|
||||||
|
if she has received Bob's reply already. If Charlie's first message
|
||||||
|
still doesn't get through but Bob's does, she knows that she is
|
||||||
|
behind a NAT or firewall that is rejecting unsolicited connection
|
||||||
|
attempts and that port forwarding is not operating properly (the
|
||||||
|
IP and port that Bob offered up should be forwarded).</li>
|
||||||
|
|
||||||
|
<li>If Alice receives Bob's PeerTest message and both of Charlie's
|
||||||
|
PeerTest messages but the enclosed IP and port numbers in Bob's
|
||||||
|
and Charlie's second messages don't match, she knows that she is
|
||||||
|
behind a symmetric NAT, rewriting all of her outbound packets with
|
||||||
|
different 'from' ports for each peer contacted. She will need to
|
||||||
|
explicitly forward a port and always have that port exposed for
|
||||||
|
remote connectivity, ignoring further port discovery.</li>
|
||||||
|
|
||||||
|
<li>If Alice receives Charlie's first message but not his second,
|
||||||
|
she will retransmit her PeerTest message to Charlie up to a
|
||||||
|
certain number of times, but if no response is received she knows
|
||||||
|
that Charlie is either confused or no longer online.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Alice should choose Bob arbitrarily from known peers who seem
|
||||||
|
to be capable of participating in peer tests. Bob in turn should
|
||||||
|
choose Charlie arbitrarily from peers that he knows who seem to be
|
||||||
|
capable of participating in peer tests and who are on a different
|
||||||
|
IP from both Bob and Alice. If the first error condition occurs
|
||||||
|
(Alice doesn't get PeerTest messages from Bob), Alice may decide
|
||||||
|
to designate a new peer as Bob and try again with a different nonce.</p>
|
||||||
|
|
||||||
|
<p>Alice's introduction key is included in all of the PeerTest
|
||||||
|
messages so that she doesn't need to already have an established
|
||||||
|
session with Bob and so that Charlie can contact her without knowing
|
||||||
|
any additional information. Alice may go on to establish a session
|
||||||
|
with either Bob or Charlie, but it is not required.</p>
|
||||||
|
|
||||||
*/
|
*/
|
||||||
class PeerTestManager {
|
class PeerTestManager {
|
||||||
private RouterContext _context;
|
private RouterContext _context;
|
||||||
private Log _log;
|
private Log _log;
|
||||||
private UDPTransport _transport;
|
private UDPTransport _transport;
|
||||||
private PacketBuilder _packetBuilder;
|
private PacketBuilder _packetBuilder;
|
||||||
/** map of Long(nonce) to PeerTestState for tests currently in progress */
|
/** map of Long(nonce) to PeerTestState for tests currently in progress (as Bob/Charlie) */
|
||||||
private final Map _activeTests;
|
private final Map<Long, PeerTestState> _activeTests;
|
||||||
/** current test we are running, or null */
|
/** current test we are running (as Alice), or null */
|
||||||
private PeerTestState _currentTest;
|
private PeerTestState _currentTest;
|
||||||
private boolean _currentTestComplete;
|
private boolean _currentTestComplete;
|
||||||
private List _recentTests;
|
/** as Alice */
|
||||||
|
private List<Long> _recentTests;
|
||||||
|
|
||||||
/** longest we will keep track of a Charlie nonce for */
|
/** longest we will keep track of a Charlie nonce for */
|
||||||
private static final int MAX_CHARLIE_LIFETIME = 10*1000;
|
private static final int MAX_CHARLIE_LIFETIME = 10*1000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Have seen peer tests (as Alice) get stuck (_currentTest != null)
|
||||||
|
* so I've thrown some synchronizization on the methods;
|
||||||
|
* don't know the root cause or whether this fixes it
|
||||||
|
*/
|
||||||
public PeerTestManager(RouterContext context, UDPTransport transport) {
|
public PeerTestManager(RouterContext context, UDPTransport transport) {
|
||||||
_context = context;
|
_context = context;
|
||||||
_transport = transport;
|
_transport = transport;
|
||||||
@ -54,7 +130,11 @@ class PeerTestManager {
|
|||||||
private static final int MAX_TEST_TIME = 30*1000;
|
private static final int MAX_TEST_TIME = 30*1000;
|
||||||
private static final long MAX_NONCE = (1l << 32) - 1l;
|
private static final long MAX_NONCE = (1l << 32) - 1l;
|
||||||
//public void runTest(InetAddress bobIP, int bobPort, SessionKey bobIntroKey) {
|
//public void runTest(InetAddress bobIP, int bobPort, SessionKey bobIntroKey) {
|
||||||
public void runTest(InetAddress bobIP, int bobPort, SessionKey bobCipherKey, SessionKey bobMACKey) {
|
|
||||||
|
/**
|
||||||
|
* The next few methods are for when we are Alice
|
||||||
|
*/
|
||||||
|
public synchronized void runTest(InetAddress bobIP, int bobPort, SessionKey bobCipherKey, SessionKey bobMACKey) {
|
||||||
if (_currentTest != null) {
|
if (_currentTest != null) {
|
||||||
if (_log.shouldLog(Log.WARN))
|
if (_log.shouldLog(Log.WARN))
|
||||||
_log.warn("We are already running a test with bob = " + _currentTest.getBobIP() + ", aborting test with bob = " + bobIP);
|
_log.warn("We are already running a test with bob = " + _currentTest.getBobIP() + ", aborting test with bob = " + bobIP);
|
||||||
@ -85,6 +165,7 @@ class PeerTestManager {
|
|||||||
|
|
||||||
private class ContinueTest implements SimpleTimer.TimedEvent {
|
private class ContinueTest implements SimpleTimer.TimedEvent {
|
||||||
public void timeReached() {
|
public void timeReached() {
|
||||||
|
synchronized (PeerTestManager.this) {
|
||||||
PeerTestState state = _currentTest;
|
PeerTestState state = _currentTest;
|
||||||
if (state == null) {
|
if (state == null) {
|
||||||
// already completed
|
// already completed
|
||||||
@ -108,7 +189,9 @@ class PeerTestManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** call from a synchronized method */
|
||||||
private boolean expired() {
|
private boolean expired() {
|
||||||
PeerTestState state = _currentTest;
|
PeerTestState state = _currentTest;
|
||||||
if (state != null)
|
if (state != null)
|
||||||
@ -117,6 +200,7 @@ class PeerTestManager {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** call from a synchronized method */
|
||||||
private void sendTestToBob() {
|
private void sendTestToBob() {
|
||||||
PeerTestState test = _currentTest;
|
PeerTestState test = _currentTest;
|
||||||
if (!expired()) {
|
if (!expired()) {
|
||||||
@ -128,6 +212,7 @@ class PeerTestManager {
|
|||||||
_currentTest = null;
|
_currentTest = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/** call from a synchronized method */
|
||||||
private void sendTestToCharlie() {
|
private void sendTestToCharlie() {
|
||||||
PeerTestState test = _currentTest;
|
PeerTestState test = _currentTest;
|
||||||
if (!expired()) {
|
if (!expired()) {
|
||||||
@ -153,9 +238,9 @@ class PeerTestManager {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Receive a PeerTest message which contains the correct nonce for our current
|
* Receive a PeerTest message which contains the correct nonce for our current
|
||||||
* test
|
* test. We are Alice.
|
||||||
*/
|
*/
|
||||||
private void receiveTestReply(RemoteHostId from, UDPPacketReader.PeerTestReader testInfo) {
|
private synchronized void receiveTestReply(RemoteHostId from, UDPPacketReader.PeerTestReader testInfo) {
|
||||||
_context.statManager().addRateData("udp.receiveTestReply", 1, 0);
|
_context.statManager().addRateData("udp.receiveTestReply", 1, 0);
|
||||||
PeerTestState test = _currentTest;
|
PeerTestState test = _currentTest;
|
||||||
if (expired())
|
if (expired())
|
||||||
@ -208,6 +293,7 @@ class PeerTestManager {
|
|||||||
if (_log.shouldLog(Log.WARN))
|
if (_log.shouldLog(Log.WARN))
|
||||||
_log.warn("Bob chose a charlie we already have a session to, cancelling the test and rerunning (bob: "
|
_log.warn("Bob chose a charlie we already have a session to, cancelling the test and rerunning (bob: "
|
||||||
+ _currentTest + ", charlie: " + from + ")");
|
+ _currentTest + ", charlie: " + from + ")");
|
||||||
|
// why are we doing this instead of calling testComplete() ?
|
||||||
_currentTestComplete = true;
|
_currentTestComplete = true;
|
||||||
_context.statManager().addRateData("udp.statusKnownCharlie", 1, 0);
|
_context.statManager().addRateData("udp.statusKnownCharlie", 1, 0);
|
||||||
honorStatus(CommSystemFacade.STATUS_UNKNOWN);
|
honorStatus(CommSystemFacade.STATUS_UNKNOWN);
|
||||||
@ -267,6 +353,9 @@ class PeerTestManager {
|
|||||||
* Evaluate the info we have and act accordingly, since the test has either timed out or
|
* Evaluate the info we have and act accordingly, since the test has either timed out or
|
||||||
* we have successfully received the second PeerTest from a Charlie.
|
* we have successfully received the second PeerTest from a Charlie.
|
||||||
*
|
*
|
||||||
|
* @param forgetTest must be true to clear out this test and allow another
|
||||||
|
*
|
||||||
|
* call from a synchronized method
|
||||||
*/
|
*/
|
||||||
private void testComplete(boolean forgetTest) {
|
private void testComplete(boolean forgetTest) {
|
||||||
_currentTestComplete = true;
|
_currentTestComplete = true;
|
||||||
@ -324,7 +413,7 @@ class PeerTestManager {
|
|||||||
* Receive a test message of some sort from the given peer, queueing up any packet
|
* Receive a test message of some sort from the given peer, queueing up any packet
|
||||||
* that should be sent in response, or if its a reply to our own current testing,
|
* that should be sent in response, or if its a reply to our own current testing,
|
||||||
* adjusting our test state.
|
* adjusting our test state.
|
||||||
*
|
* We could be Alice, Bob, or Charlie.
|
||||||
*/
|
*/
|
||||||
public void receiveTest(RemoteHostId from, UDPPacketReader reader) {
|
public void receiveTest(RemoteHostId from, UDPPacketReader reader) {
|
||||||
_context.statManager().addRateData("udp.receiveTest", 1, 0);
|
_context.statManager().addRateData("udp.receiveTest", 1, 0);
|
||||||
@ -334,10 +423,13 @@ class PeerTestManager {
|
|||||||
long nonce = testInfo.readNonce();
|
long nonce = testInfo.readNonce();
|
||||||
PeerTestState test = _currentTest;
|
PeerTestState test = _currentTest;
|
||||||
if ( (test != null) && (test.getNonce() == nonce) ) {
|
if ( (test != null) && (test.getNonce() == nonce) ) {
|
||||||
|
// we are Alice
|
||||||
receiveTestReply(from, testInfo);
|
receiveTestReply(from, testInfo);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we are Bob or Charlie
|
||||||
|
|
||||||
if ( (testInfo.readIPSize() > 0) && (testPort > 0) ) {
|
if ( (testInfo.readIPSize() > 0) && (testPort > 0) ) {
|
||||||
testIP = new byte[testInfo.readIPSize()];
|
testIP = new byte[testInfo.readIPSize()];
|
||||||
testInfo.readIP(testIP, 0);
|
testInfo.readIP(testIP, 0);
|
||||||
@ -360,7 +452,7 @@ class PeerTestManager {
|
|||||||
// initiated test
|
// initiated test
|
||||||
} else {
|
} else {
|
||||||
if (_log.shouldLog(Log.DEBUG))
|
if (_log.shouldLog(Log.DEBUG))
|
||||||
_log.debug("We are charlie, as te testIP/port is " + RemoteHostId.toString(testIP) + ":" + testPort + " and the state is unknown for " + nonce);
|
_log.debug("We are charlie, as the testIP/port is " + RemoteHostId.toString(testIP) + ":" + testPort + " and the state is unknown for " + nonce);
|
||||||
// we are charlie, since alice never sends us her IP and port, only bob does (and,
|
// we are charlie, since alice never sends us her IP and port, only bob does (and,
|
||||||
// erm, we're not alice, since it isn't our nonce)
|
// erm, we're not alice, since it isn't our nonce)
|
||||||
receiveFromBobAsCharlie(from, testInfo, nonce, null);
|
receiveFromBobAsCharlie(from, testInfo, nonce, null);
|
||||||
@ -388,6 +480,8 @@ class PeerTestManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Below here are methods for when we are Bob or Charlie
|
||||||
|
|
||||||
private static final int MAX_RELAYED_PER_TEST = 5;
|
private static final int MAX_RELAYED_PER_TEST = 5;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Reference in New Issue
Block a user