- Try to fix 100% CPU, caused perhaps by JVM NIO bug...
      - Fix failsafe stats
This commit is contained in:
zzz
2008-06-30 03:14:32 +00:00
parent bae712ad96
commit 2768bef991
5 changed files with 56 additions and 6 deletions

View File

@ -1,3 +1,14 @@
2008-06-30 zzz
* configstats.jsp: Fix NPE when no stats checked (thanks nothome27!)
* i2psnark:
- Fix NPE caused by race (thanks echelon!)
- Add mastertracker, remove de-ebook
* NTCP:
- Try to fix 100% CPU, caused perhaps by JVM NIO bug...
- Fix failsafe stats
* PersistentDataStore: More leaseSet code cleanup
* SimpleTimer: Change congestion message from error to warn
2008-06-24 zzz
* FloodfillMonitorJob: Change range from 5-7 to 4-6
* NTCP: Remove getIsInbound(), duplicate of isInbound()

View File

@ -17,7 +17,7 @@ import net.i2p.CoreVersion;
public class RouterVersion {
public final static String ID = "$Revision: 1.548 $ $Date: 2008-06-07 23:00:00 $";
public final static String VERSION = "0.6.2";
public final static long BUILD = 6;
public final static long BUILD = 7;
public static void main(String args[]) {
System.out.println("I2P Router version: " + VERSION + "-" + BUILD);
System.out.println("Router ID: " + RouterVersion.ID);

View File

@ -8,6 +8,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import net.i2p.data.DataHelper;
import net.i2p.data.RouterIdentity;
import net.i2p.data.RouterInfo;
import net.i2p.router.RouterContext;
@ -124,6 +125,7 @@ public class EventPumper implements Runnable {
int failsafeWrites = 0;
int failsafeCloses = 0;
int failsafeInvalid = 0;
// pointless if we do this every 2 seconds?
long expireIdleWriteTime = 20*60*1000l; // + _context.random().nextLong(60*60*1000l);
for (Iterator iter = all.iterator(); iter.hasNext(); ) {
@ -134,6 +136,33 @@ public class EventPumper implements Runnable {
continue; // to the next con
NTCPConnection con = (NTCPConnection)att;
/**
* 100% CPU bug
* http://forums.java.net/jive/thread.jspa?messageID=255525
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6595055
*
* The problem is around a channel that was originally registered with Selector for i/o gets
* closed on the server side (due to early client side exit). But the server side can know
* about such channel only when it does i/o (read/write) and thereby getting into an IO exception.
* In this case, (bug 6595055)there are times (erroneous) when server side (selector) did not
* know the channel is already closed (peer-reset), but continue to do the selection cycle on
* a key set whose associated channel is alreay closed or invalid. Hence, selector's slect(..)
* keep spinging with zero return without blocking for the timeout period.
*
* One fix is to have a provision in the application, to check if any of the Selector's keyset
* is having a closed channel/or invalid registration due to channel closure.
*/
if ((!key.isValid()) &&
(!((SocketChannel)key.channel()).isConnectionPending()) &&
con.getTimeSinceCreated() > 2 * NTCPTransport.ESTABLISH_TIMEOUT) {
if (_log.shouldLog(Log.WARN))
_log.warn("Invalid key " + con);
// this will cancel the key, and it will then be removed from the keyset
con.close();
failsafeInvalid++;
continue;
}
if ( (con.getWriteBufCount() > 0) &&
((key.interestOps() & SelectionKey.OP_WRITE) == 0) ) {
// the data queued to be sent has already passed through
@ -152,11 +181,13 @@ public class EventPumper implements Runnable {
} catch (CancelledKeyException cke) {
// cancelled while updating the interest ops. ah well
}
if (failsafeWrites > 0)
_context.statManager().addRateData("ntcp.failsafeWrites", failsafeWrites, 0);
if (failsafeCloses > 0)
_context.statManager().addRateData("ntcp.failsafeCloses", failsafeCloses, 0);
}
if (failsafeWrites > 0)
_context.statManager().addRateData("ntcp.failsafeWrites", failsafeWrites, 0);
if (failsafeCloses > 0)
_context.statManager().addRateData("ntcp.failsafeCloses", failsafeCloses, 0);
if (failsafeInvalid > 0)
_context.statManager().addRateData("ntcp.failsafeInvalid", failsafeInvalid, 0);
} catch (ClosedSelectorException cse) {
continue;
}

View File

@ -1277,4 +1277,11 @@ public class NTCPConnection implements FIFOBandwidthLimiter.CompleteListener {
}
}
}
public String toString() {
return "NTCP Connection to " +
(_remotePeer == null ? "unknown " : _remotePeer.calculateHash().toBase64().substring(0,6)) +
" inbound? " + _isInbound + " established? " + _established +
" created " + DataHelper.formatDuration(getTimeSinceCreated()) + " ago";
}
}

View File

@ -63,6 +63,7 @@ public class NTCPTransport extends TransportImpl {
_context.statManager().createRateStat("ntcp.sendBacklogTime", "How long the head of the send queue has been waiting when we fail to add a new one to the queue (period is the number of messages queued)", "ntcp", new long[] { 60*1000, 10*60*1000 });
_context.statManager().createRateStat("ntcp.failsafeWrites", "How many times do we need to proactively add in an extra nio write to a peer at any given failsafe pass?", "ntcp", new long[] { 60*1000, 10*60*1000 });
_context.statManager().createRateStat("ntcp.failsafeCloses", "How many times do we need to proactively close an idle connection to a peer at any given failsafe pass?", "ntcp", new long[] { 60*1000, 10*60*1000 });
_context.statManager().createRateStat("ntcp.failsafeInvalid", "How many times do we close a connection to a peer to work around a JVM bug?", "ntcp", new long[] { 60*1000, 10*60*1000 });
_context.statManager().createRateStat("ntcp.accept", "", "ntcp", new long[] { 60*1000, 10*60*1000 });
_context.statManager().createRateStat("ntcp.attemptShitlistedPeer", "", "ntcp", new long[] { 60*1000, 10*60*1000 });
_context.statManager().createRateStat("ntcp.attemptUnreachablePeer", "", "ntcp", new long[] { 60*1000, 10*60*1000 });
@ -452,7 +453,7 @@ public class NTCPTransport extends TransportImpl {
* how long from initial connection attempt (accept() or connect()) until
* the con must be established to avoid premature close()ing
*/
private static final int ESTABLISH_TIMEOUT = 10*1000;
public static final int ESTABLISH_TIMEOUT = 10*1000;
/** add us to the establishment timeout process */
void establishing(NTCPConnection con) {
synchronized (_establishing) {