* NTCP:
- Try to fix 100% CPU, caused perhaps by JVM NIO bug... - Fix failsafe stats
This commit is contained in:
11
history.txt
11
history.txt
@ -1,3 +1,14 @@
|
||||
2008-06-30 zzz
|
||||
* configstats.jsp: Fix NPE when no stats checked (thanks nothome27!)
|
||||
* i2psnark:
|
||||
- Fix NPE caused by race (thanks echelon!)
|
||||
- Add mastertracker, remove de-ebook
|
||||
* NTCP:
|
||||
- Try to fix 100% CPU, caused perhaps by JVM NIO bug...
|
||||
- Fix failsafe stats
|
||||
* PersistentDataStore: More leaseSet code cleanup
|
||||
* SimpleTimer: Change congestion message from error to warn
|
||||
|
||||
2008-06-24 zzz
|
||||
* FloodfillMonitorJob: Change range from 5-7 to 4-6
|
||||
* NTCP: Remove getIsInbound(), duplicate of isInbound()
|
||||
|
@ -17,7 +17,7 @@ import net.i2p.CoreVersion;
|
||||
public class RouterVersion {
|
||||
public final static String ID = "$Revision: 1.548 $ $Date: 2008-06-07 23:00:00 $";
|
||||
public final static String VERSION = "0.6.2";
|
||||
public final static long BUILD = 6;
|
||||
public final static long BUILD = 7;
|
||||
public static void main(String args[]) {
|
||||
System.out.println("I2P Router version: " + VERSION + "-" + BUILD);
|
||||
System.out.println("Router ID: " + RouterVersion.ID);
|
||||
|
@ -8,6 +8,7 @@ import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import net.i2p.data.DataHelper;
|
||||
import net.i2p.data.RouterIdentity;
|
||||
import net.i2p.data.RouterInfo;
|
||||
import net.i2p.router.RouterContext;
|
||||
@ -124,6 +125,7 @@ public class EventPumper implements Runnable {
|
||||
|
||||
int failsafeWrites = 0;
|
||||
int failsafeCloses = 0;
|
||||
int failsafeInvalid = 0;
|
||||
// pointless if we do this every 2 seconds?
|
||||
long expireIdleWriteTime = 20*60*1000l; // + _context.random().nextLong(60*60*1000l);
|
||||
for (Iterator iter = all.iterator(); iter.hasNext(); ) {
|
||||
@ -134,6 +136,33 @@ public class EventPumper implements Runnable {
|
||||
continue; // to the next con
|
||||
NTCPConnection con = (NTCPConnection)att;
|
||||
|
||||
/**
|
||||
* 100% CPU bug
|
||||
* http://forums.java.net/jive/thread.jspa?messageID=255525
|
||||
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6595055
|
||||
*
|
||||
* The problem is around a channel that was originally registered with Selector for i/o gets
|
||||
* closed on the server side (due to early client side exit). But the server side can know
|
||||
* about such channel only when it does i/o (read/write) and thereby getting into an IO exception.
|
||||
* In this case, (bug 6595055)there are times (erroneous) when server side (selector) did not
|
||||
* know the channel is already closed (peer-reset), but continue to do the selection cycle on
|
||||
* a key set whose associated channel is alreay closed or invalid. Hence, selector's slect(..)
|
||||
* keep spinging with zero return without blocking for the timeout period.
|
||||
*
|
||||
* One fix is to have a provision in the application, to check if any of the Selector's keyset
|
||||
* is having a closed channel/or invalid registration due to channel closure.
|
||||
*/
|
||||
if ((!key.isValid()) &&
|
||||
(!((SocketChannel)key.channel()).isConnectionPending()) &&
|
||||
con.getTimeSinceCreated() > 2 * NTCPTransport.ESTABLISH_TIMEOUT) {
|
||||
if (_log.shouldLog(Log.WARN))
|
||||
_log.warn("Invalid key " + con);
|
||||
// this will cancel the key, and it will then be removed from the keyset
|
||||
con.close();
|
||||
failsafeInvalid++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( (con.getWriteBufCount() > 0) &&
|
||||
((key.interestOps() & SelectionKey.OP_WRITE) == 0) ) {
|
||||
// the data queued to be sent has already passed through
|
||||
@ -152,11 +181,13 @@ public class EventPumper implements Runnable {
|
||||
} catch (CancelledKeyException cke) {
|
||||
// cancelled while updating the interest ops. ah well
|
||||
}
|
||||
if (failsafeWrites > 0)
|
||||
_context.statManager().addRateData("ntcp.failsafeWrites", failsafeWrites, 0);
|
||||
if (failsafeCloses > 0)
|
||||
_context.statManager().addRateData("ntcp.failsafeCloses", failsafeCloses, 0);
|
||||
}
|
||||
if (failsafeWrites > 0)
|
||||
_context.statManager().addRateData("ntcp.failsafeWrites", failsafeWrites, 0);
|
||||
if (failsafeCloses > 0)
|
||||
_context.statManager().addRateData("ntcp.failsafeCloses", failsafeCloses, 0);
|
||||
if (failsafeInvalid > 0)
|
||||
_context.statManager().addRateData("ntcp.failsafeInvalid", failsafeInvalid, 0);
|
||||
} catch (ClosedSelectorException cse) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1277,4 +1277,11 @@ public class NTCPConnection implements FIFOBandwidthLimiter.CompleteListener {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "NTCP Connection to " +
|
||||
(_remotePeer == null ? "unknown " : _remotePeer.calculateHash().toBase64().substring(0,6)) +
|
||||
" inbound? " + _isInbound + " established? " + _established +
|
||||
" created " + DataHelper.formatDuration(getTimeSinceCreated()) + " ago";
|
||||
}
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ public class NTCPTransport extends TransportImpl {
|
||||
_context.statManager().createRateStat("ntcp.sendBacklogTime", "How long the head of the send queue has been waiting when we fail to add a new one to the queue (period is the number of messages queued)", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
_context.statManager().createRateStat("ntcp.failsafeWrites", "How many times do we need to proactively add in an extra nio write to a peer at any given failsafe pass?", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
_context.statManager().createRateStat("ntcp.failsafeCloses", "How many times do we need to proactively close an idle connection to a peer at any given failsafe pass?", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
_context.statManager().createRateStat("ntcp.failsafeInvalid", "How many times do we close a connection to a peer to work around a JVM bug?", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
_context.statManager().createRateStat("ntcp.accept", "", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
_context.statManager().createRateStat("ntcp.attemptShitlistedPeer", "", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
_context.statManager().createRateStat("ntcp.attemptUnreachablePeer", "", "ntcp", new long[] { 60*1000, 10*60*1000 });
|
||||
@ -452,7 +453,7 @@ public class NTCPTransport extends TransportImpl {
|
||||
* how long from initial connection attempt (accept() or connect()) until
|
||||
* the con must be established to avoid premature close()ing
|
||||
*/
|
||||
private static final int ESTABLISH_TIMEOUT = 10*1000;
|
||||
public static final int ESTABLISH_TIMEOUT = 10*1000;
|
||||
/** add us to the establishment timeout process */
|
||||
void establishing(NTCPConnection con) {
|
||||
synchronized (_establishing) {
|
||||
|
Reference in New Issue
Block a user