move DecayingBloomFilter, DecayingHashSet, and xlattice filters from core to router

This commit is contained in:
zzz
2012-07-02 19:22:33 +00:00
parent 5eab417134
commit e2588a5379
9 changed files with 19 additions and 19 deletions

View File

@ -1,7 +1,7 @@
package net.i2p.router;
import net.i2p.util.DecayingBloomFilter;
import net.i2p.util.DecayingHashSet;
import net.i2p.router.util.DecayingBloomFilter;
import net.i2p.router.util.DecayingHashSet;
import net.i2p.util.Log;
/**

View File

@ -4,8 +4,8 @@ import java.util.Map;
import net.i2p.data.Hash;
import net.i2p.router.RouterContext;
import net.i2p.util.DecayingBloomFilter;
import net.i2p.util.DecayingHashSet;
import net.i2p.router.util.DecayingBloomFilter;
import net.i2p.router.util.DecayingHashSet;
import net.i2p.util.Log;
/**

View File

@ -3,9 +3,9 @@ package net.i2p.router.tunnel;
import net.i2p.data.ByteArray;
import net.i2p.data.DataHelper;
import net.i2p.router.RouterContext;
import net.i2p.router.util.DecayingBloomFilter;
import net.i2p.router.util.DecayingHashSet;
import net.i2p.util.ByteCache;
import net.i2p.util.DecayingBloomFilter;
import net.i2p.util.DecayingHashSet;
/**
* Manage the IV validation for all of the router's tunnels by way of a big

View File

@ -9,8 +9,8 @@ import net.i2p.data.PrivateKey;
import net.i2p.data.SessionKey;
import net.i2p.data.i2np.BuildRequestRecord;
import net.i2p.data.i2np.TunnelBuildMessage;
import net.i2p.util.DecayingBloomFilter;
import net.i2p.util.DecayingHashSet;
import net.i2p.router.util.DecayingBloomFilter;
import net.i2p.router.util.DecayingHashSet;
import net.i2p.util.Log;
/**

View File

@ -0,0 +1,455 @@
package net.i2p.router.util;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import net.i2p.I2PAppContext;
import net.i2p.data.DataHelper;
import net.i2p.util.Log;
import net.i2p.util.SimpleTimer;
import org.xlattice.crypto.filters.BloomSHA1;
/**
* Series of bloom filters which decay over time, allowing their continual use
* for time sensitive data. This has a fixed size (per
* period, using two periods overall), allowing this to pump through hundreds of
* entries per second with virtually no false positive rate. Down the line,
* this may be refactored to allow tighter control of the size necessary for the
* contained bloom filters.
*
* See main() for an analysis of false positive rate.
* See BloomFilterIVValidator for instantiation parameters.
* See DecayingHashSet for a smaller and simpler version.
* @see net.i2p.router.tunnel.BloomFilterIVValidator
* @see net.i2p.router.util.DecayingHashSet
*/
public class DecayingBloomFilter {
protected final I2PAppContext _context;
protected final Log _log;
private BloomSHA1 _current;
private BloomSHA1 _previous;
protected final int _durationMs;
protected final int _entryBytes;
private final byte _extenders[][];
private final byte _extended[];
private final byte _longToEntry[];
private final long _longToEntryMask;
protected long _currentDuplicates;
protected volatile boolean _keepDecaying;
protected final SimpleTimer.TimedEvent _decayEvent;
/** just for logging */
protected final String _name;
/** synchronize against this lock when switching double buffers */
protected final ReentrantReadWriteLock _reorganizeLock = new ReentrantReadWriteLock();
private static final int DEFAULT_M = 23;
private static final int DEFAULT_K = 11;
/** true for debugging */
private static final boolean ALWAYS_MISS = false;
/** only for extension by DHS */
protected DecayingBloomFilter(int durationMs, int entryBytes, String name, I2PAppContext context) {
_context = context;
_log = context.logManager().getLog(getClass());
_entryBytes = entryBytes;
_name = name;
_durationMs = durationMs;
// all final
_extenders = null;
_extended = null;
_longToEntry = null;
_longToEntryMask = 0;
context.addShutdownTask(new Shutdown());
_decayEvent = new DecayEvent();
_keepDecaying = true;
SimpleTimer.getInstance().addEvent(_decayEvent, _durationMs);
}
/**
* Create a bloom filter that will decay its entries over time.
*
* @param durationMs entries last for at least this long, but no more than twice this long
* @param entryBytes how large are the entries to be added? if this is less than 32 bytes,
* the entries added will be expanded by concatenating their XORing
* against with sufficient random values.
*/
public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes) {
this(context, durationMs, entryBytes, "DBF");
}
/** @param name just for logging / debugging / stats */
public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes, String name) {
// this is instantiated in four different places, they may have different
// requirements, but for now use this as a gross method of memory reduction.
// m == 23 => 1MB each BloomSHA1 (4 pairs = 8MB total)
this(context, durationMs, entryBytes, name, context.getProperty("router.decayingBloomFilterM", DEFAULT_M));
}
/** @param m filter size exponent */
public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes, String name, int m) {
_context = context;
_log = context.logManager().getLog(DecayingBloomFilter.class);
_entryBytes = entryBytes;
_name = name;
int k = DEFAULT_K;
// max is (23,11) or (26,10); see KeySelector for details
if (m > DEFAULT_M)
k--;
_current = new BloomSHA1(m, k);
_previous = new BloomSHA1(m, k);
_durationMs = durationMs;
int numExtenders = (32+ (entryBytes-1))/entryBytes - 1;
if (numExtenders < 0)
numExtenders = 0;
_extenders = new byte[numExtenders][entryBytes];
for (int i = 0; i < numExtenders; i++)
_context.random().nextBytes(_extenders[i]);
if (numExtenders > 0) {
_extended = new byte[32];
_longToEntry = new byte[_entryBytes];
_longToEntryMask = (1l << (_entryBytes * 8l)) -1;
} else {
// final
_extended = null;
_longToEntry = null;
_longToEntryMask = 0;
}
_decayEvent = new DecayEvent();
_keepDecaying = true;
SimpleTimer.getInstance().addEvent(_decayEvent, _durationMs);
if (_log.shouldLog(Log.WARN))
_log.warn("New DBF " + name + " m = " + m + " k = " + k + " entryBytes = " + entryBytes +
" numExtenders = " + numExtenders + " cycle (s) = " + (durationMs / 1000));
// try to get a handle on memory usage vs. false positives
context.statManager().createRateStat("router.decayingBloomFilter." + name + ".size",
"Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
context.statManager().createRateStat("router.decayingBloomFilter." + name + ".dups",
"1000000 * Duplicates/Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
context.statManager().createRateStat("router.decayingBloomFilter." + name + ".log10(falsePos)",
"log10 of the false positive rate (must have net.i2p.util.DecayingBloomFilter=DEBUG)",
"Router", new long[] { 10 * Math.max(60*1000, durationMs) });
context.addShutdownTask(new Shutdown());
}
/**
* @since 0.8.8
*/
private class Shutdown implements Runnable {
public void run() {
clear();
}
}
public long getCurrentDuplicateCount() { return _currentDuplicates; }
/** unsynchronized but only used for logging elsewhere */
public int getInsertedCount() {
return _current.size() + _previous.size();
}
/** unsynchronized, only used for logging elsewhere */
public double getFalsePositiveRate() {
return _current.falsePositives();
}
/**
* @return true if the entry added is a duplicate
*/
public boolean add(byte entry[]) {
return add(entry, 0, entry.length);
}
/**
* @return true if the entry added is a duplicate
*/
public boolean add(byte entry[], int off, int len) {
if (ALWAYS_MISS) return false;
if (entry == null)
throw new IllegalArgumentException("Null entry");
if (len != _entryBytes)
throw new IllegalArgumentException("Bad entry [" + len + ", expected "
+ _entryBytes + "]");
getReadLock();
try {
return locked_add(entry, off, len, true);
} finally { releaseReadLock(); }
}
/**
* @return true if the entry added is a duplicate. the number of low order
* bits used is determined by the entryBytes parameter used on creation of the
* filter.
*
*/
public boolean add(long entry) {
if (ALWAYS_MISS) return false;
if (_entryBytes <= 7)
entry = ((entry ^ _longToEntryMask) & ((1 << 31)-1)) | (entry ^ _longToEntryMask);
//entry &= _longToEntryMask;
if (entry < 0) {
DataHelper.toLong(_longToEntry, 0, _entryBytes, 0-entry);
_longToEntry[0] |= (1 << 7);
} else {
DataHelper.toLong(_longToEntry, 0, _entryBytes, entry);
}
getReadLock();
try {
return locked_add(_longToEntry, 0, _longToEntry.length, true);
} finally { releaseReadLock(); }
}
/**
* @return true if the entry is already known. this does NOT add the
* entry however.
*
*/
public boolean isKnown(long entry) {
if (ALWAYS_MISS) return false;
if (_entryBytes <= 7)
entry = ((entry ^ _longToEntryMask) & ((1 << 31)-1)) | (entry ^ _longToEntryMask);
if (entry < 0) {
DataHelper.toLong(_longToEntry, 0, _entryBytes, 0-entry);
_longToEntry[0] |= (1 << 7);
} else {
DataHelper.toLong(_longToEntry, 0, _entryBytes, entry);
}
getReadLock();
try {
return locked_add(_longToEntry, 0, _longToEntry.length, false);
} finally { releaseReadLock(); }
}
private boolean locked_add(byte entry[], int offset, int len, boolean addIfNew) {
if (_extended != null) {
// extend the entry to 32 bytes
System.arraycopy(entry, offset, _extended, 0, len);
for (int i = 0; i < _extenders.length; i++)
DataHelper.xor(entry, offset, _extenders[i], 0, _extended, _entryBytes * (i+1), _entryBytes);
BloomSHA1.FilterKey key = _current.getFilterKey(_extended, 0, 32);
boolean seen = _current.locked_member(key);
if (!seen)
seen = _previous.locked_member(key);
if (seen) {
_currentDuplicates++;
_current.release(key);
return true;
} else {
if (addIfNew) {
_current.locked_insert(key);
}
_current.release(key);
return false;
}
} else {
BloomSHA1.FilterKey key = _current.getFilterKey(entry, offset, len);
boolean seen = _current.locked_member(key);
if (!seen)
seen = _previous.locked_member(key);
if (seen) {
_currentDuplicates++;
_current.release(key);
return true;
} else {
if (addIfNew) {
_current.locked_insert(key);
}
_current.release(key);
return false;
}
}
}
public void clear() {
if (!getWriteLock())
return;
try {
_current.clear();
_previous.clear();
_currentDuplicates = 0;
} finally { releaseWriteLock(); }
}
public void stopDecaying() {
_keepDecaying = false;
SimpleTimer.getInstance().removeEvent(_decayEvent);
}
protected void decay() {
int currentCount = 0;
long dups = 0;
double fpr = 0d;
if (!getWriteLock())
return;
try {
BloomSHA1 tmp = _previous;
currentCount = _current.size();
if (_log.shouldLog(Log.DEBUG) && currentCount > 0)
fpr = _current.falsePositives();
_previous = _current;
_current = tmp;
_current.clear();
dups = _currentDuplicates;
_currentDuplicates = 0;
} finally { releaseWriteLock(); }
if (_log.shouldLog(Log.DEBUG))
_log.debug("Decaying the filter " + _name + " after inserting " + currentCount
+ " elements and " + dups + " false positives with FPR = " + fpr);
_context.statManager().addRateData("router.decayingBloomFilter." + _name + ".size",
currentCount);
if (currentCount > 0)
_context.statManager().addRateData("router.decayingBloomFilter." + _name + ".dups",
1000l*1000*dups/currentCount);
if (fpr > 0d) {
// only if log.shouldLog(Log.DEBUG) ...
long exponent = (long) Math.log10(fpr);
_context.statManager().addRateData("router.decayingBloomFilter." + _name + ".log10(falsePos)",
exponent);
}
}
private class DecayEvent implements SimpleTimer.TimedEvent {
public void timeReached() {
if (_keepDecaying) {
decay();
SimpleTimer.getInstance().addEvent(DecayEvent.this, _durationMs);
}
}
}
/** @since 0.8.11 moved from DecayingHashSet */
protected void getReadLock() {
_reorganizeLock.readLock().lock();
}
/** @since 0.8.11 moved from DecayingHashSet */
protected void releaseReadLock() {
_reorganizeLock.readLock().unlock();
}
/**
* @return true if the lock was acquired
* @since 0.8.11 moved from DecayingHashSet
*/
protected boolean getWriteLock() {
try {
boolean rv = _reorganizeLock.writeLock().tryLock(5000, TimeUnit.MILLISECONDS);
if (!rv)
_log.error("no lock, size is: " + _reorganizeLock.getQueueLength(), new Exception("rats"));
return rv;
} catch (InterruptedException ie) {}
return false;
}
/** @since 0.8.11 moved from DecayingHashSet */
protected void releaseWriteLock() {
_reorganizeLock.writeLock().unlock();
}
/**
* This filter is used only for participants and OBEPs, not
* IBGWs, so depending on your assumptions of avg. tunnel length,
* the performance is somewhat better than the gross share BW
* would indicate.
*
*<pre>
* Following stats for m=23, k=11:
* Theoretical false positive rate for 16 KBps: 1.17E-21
* Theoretical false positive rate for 24 KBps: 9.81E-20
* Theoretical false positive rate for 32 KBps: 2.24E-18
* Theoretical false positive rate for 256 KBps: 7.45E-9
* Theoretical false positive rate for 512 KBps: 5.32E-6
* Theoretical false positive rate for 1024 KBps: 1.48E-3
* Then it gets bad: 1280 .67%; 1536 2.0%; 1792 4.4%; 2048 8.2%.
*
* Following stats for m=24, k=10:
* 1280 4.5E-5; 1792 5.6E-4; 2048 0.14%
*
* Following stats for m=25, k=10:
* 1792 2.4E-6; 4096 0.14%; 5120 0.6%; 6144 1.7%; 8192 6.8%; 10240 15%
*</pre>
*/
public static void main(String args[]) {
System.out.println("Usage: DecayingBloomFilter [kbps [m [iterations]]] (default 256 23 10)");
int kbps = 256;
if (args.length >= 1) {
try {
kbps = Integer.parseInt(args[0]);
} catch (NumberFormatException nfe) {}
}
int m = DEFAULT_M;
if (args.length >= 2) {
try {
m = Integer.parseInt(args[1]);
} catch (NumberFormatException nfe) {}
}
int iterations = 10;
if (args.length >= 3) {
try {
iterations = Integer.parseInt(args[2]);
} catch (NumberFormatException nfe) {}
}
testByLong(kbps, m, iterations);
testByBytes(kbps, m, iterations);
}
private static void testByLong(int kbps, int m, int numRuns) {
int messages = 60 * 10 * kbps;
Random r = new Random();
DecayingBloomFilter filter = new DecayingBloomFilter(I2PAppContext.getGlobalContext(), 600*1000, 8, "test", m);
int falsePositives = 0;
long totalTime = 0;
double fpr = 0d;
for (int j = 0; j < numRuns; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < messages; i++) {
if (filter.add(r.nextLong())) {
falsePositives++;
//System.out.println("False positive " + falsePositives + " (testByLong j=" + j + " i=" + i + ")");
}
}
totalTime += System.currentTimeMillis() - start;
fpr = filter.getFalsePositiveRate();
filter.clear();
}
filter.stopDecaying();
System.out.println("False postive rate should be " + fpr);
System.out.println("After " + numRuns + " runs pushing " + messages + " entries in "
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
+ falsePositives + " false positives");
}
private static void testByBytes(int kbps, int m, int numRuns) {
byte iv[][] = new byte[60*10*kbps][16];
Random r = new Random();
for (int i = 0; i < iv.length; i++)
r.nextBytes(iv[i]);
DecayingBloomFilter filter = new DecayingBloomFilter(I2PAppContext.getGlobalContext(), 600*1000, 16, "test", m);
int falsePositives = 0;
long totalTime = 0;
double fpr = 0d;
for (int j = 0; j < numRuns; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < iv.length; i++) {
if (filter.add(iv[i])) {
falsePositives++;
//System.out.println("False positive " + falsePositives + " (testByBytes j=" + j + " i=" + i + ")");
}
}
totalTime += System.currentTimeMillis() - start;
fpr = filter.getFalsePositiveRate();
filter.clear();
}
filter.stopDecaying();
System.out.println("False postive rate should be " + fpr);
System.out.println("After " + numRuns + " runs pushing " + iv.length + " entries in "
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
+ falsePositives + " false positives");
//System.out.println("inserted: " + bloom.size() + " with " + bloom.capacity()
// + " (" + bloom.falsePositives()*100.0d + "% false positive)");
}
}

View File

@ -0,0 +1,335 @@
package net.i2p.router.util;
import java.util.Random;
import net.i2p.I2PAppContext;
import net.i2p.data.DataHelper;
import net.i2p.util.ConcurrentHashSet;
import net.i2p.util.Log;
/**
* Double buffered hash set.
* Since DecayingBloomFilter was instantiated 4 times for a total memory usage
* of 8MB, it seemed like we could do a lot better, given these usage stats
* on a class L router:
*
* ./router/java/src/net/i2p/router/tunnel/BuildMessageProcessor.java:
* 32 bytes, peak 10 entries in 1m
* (320 peak entries seen on fast router)
*
* ./router/java/src/net/i2p/router/transport/udp/InboundMessageFragments.java:
* 4 bytes, peak 150 entries in 10s
* (1600 peak entries seen on fast router)
*
* ./router/java/src/net/i2p/router/MessageValidator.java:
* 8 bytes, peak 1K entries in 2m
* (36K peak entries seen on fast router)
*
* ./router/java/src/net/i2p/router/tunnel/BloomFilterIVValidator.java:
* 16 bytes, peak 15K entries in 10m
*
* If the ArrayWrapper object in the HashSet is 50 bytes, and BloomSHA1(23, 11) is 1MB,
* then for less than 20K entries this is smaller.
* And this uses space proportional to traffiic, so it doesn't penalize small routers
* with a fixed 8MB.
* So let's try it for the first 2 or 3, for now.
*
* Also, DBF is syncrhonized, and uses SimpleTimer.
* Here we use a read/write lock, with synchronization only
* when switching double buffers, and we use SimpleScheduler.
*
* Yes, we could stare at stats all day, and try to calculate an acceptable
* false-positive rate for each of the above uses, then estimate the DBF size
* required to meet that rate for a given usage. Or even start adjusting the
* Bloom filter m and k values on a per-DBF basis. But it's a whole lot easier
* to implement something with a zero false positive rate, and uses less memory
* for almost all bandwidth classes.
*
* This has a strictly zero false positive rate for <= 8 byte keys.
* For larger keys, it is 1 / (2**64) ~= 5E-20, which is better than
* DBF for any entry count greater than about 14K.
*
* DBF has a zero false negative rate over the period
* 2 * durationMs. And a 100% false negative rate beyond that period.
* This has the same properties.
*
* This performs about twice as fast as DBF in the test below.
*
* @author zzz
*/
public class DecayingHashSet extends DecayingBloomFilter {
private ConcurrentHashSet<ArrayWrapper> _current;
private ConcurrentHashSet<ArrayWrapper> _previous;
/**
* Create a double-buffered hash set that will decay its entries over time.
*
* @param durationMs entries last for at least this long, but no more than twice this long
* @param entryBytes how large are the entries to be added? 1 to 32 bytes
*/
public DecayingHashSet(I2PAppContext context, int durationMs, int entryBytes) {
this(context, durationMs, entryBytes, "DHS");
}
/** @param name just for logging / debugging / stats */
public DecayingHashSet(I2PAppContext context, int durationMs, int entryBytes, String name) {
super(durationMs, entryBytes, name, context);
if (entryBytes <= 0 || entryBytes > 32)
throw new IllegalArgumentException("Bad size");
_current = new ConcurrentHashSet(128);
_previous = new ConcurrentHashSet(128);
if (_log.shouldLog(Log.WARN))
_log.warn("New DHS " + name + " entryBytes = " + entryBytes +
" cycle (s) = " + (durationMs / 1000));
// try to get a handle on memory usage vs. false positives
context.statManager().createRateStat("router.decayingHashSet." + name + ".size",
"Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
context.statManager().createRateStat("router.decayingHashSet." + name + ".dups",
"1000000 * Duplicates/Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
}
/** unsynchronized but only used for logging elsewhere */
@Override
public int getInsertedCount() {
return _current.size() + _previous.size();
}
/** pointless, only used for logging elsewhere */
@Override
public double getFalsePositiveRate() {
if (_entryBytes <= 8)
return 0d;
return 1d / Math.pow(2d, 64d); // 5.4E-20
}
/**
* @return true if the entry added is a duplicate
*/
@Override
public boolean add(byte entry[], int off, int len) {
if (entry == null)
throw new IllegalArgumentException("Null entry");
if (len != _entryBytes)
throw new IllegalArgumentException("Bad entry [" + len + ", expected "
+ _entryBytes + "]");
ArrayWrapper w = new ArrayWrapper(entry, off, len);
getReadLock();
try {
return locked_add(w, true);
} finally { releaseReadLock(); }
}
/**
* @return true if the entry added is a duplicate. the number of low order
* bits used is determined by the entryBytes parameter used on creation of the
* filter.
*
*/
@Override
public boolean add(long entry) {
return add(entry, true);
}
/**
* @return true if the entry is already known. this does NOT add the
* entry however.
*
*/
@Override
public boolean isKnown(long entry) {
return add(entry, false);
}
private boolean add(long entry, boolean addIfNew) {
ArrayWrapper w = new ArrayWrapper(entry);
getReadLock();
try {
return locked_add(w, addIfNew);
} finally { releaseReadLock(); }
}
/**
* @param addIfNew if true, add the element to current if it is not already there or in previous;
* if false, only check
* @return if the element is in either the current or previous set
*/
private boolean locked_add(ArrayWrapper w, boolean addIfNew) {
boolean seen = _previous.contains(w);
// only access _current once.
if (!seen) {
if (addIfNew)
seen = !_current.add(w);
else
seen = _current.contains(w);
}
if (seen) {
// why increment if addIfNew == false? Only used for stats...
_currentDuplicates++;
}
return seen;
}
@Override
public void clear() {
_current.clear();
_previous.clear();
_currentDuplicates = 0;
}
/** super doesn't call clear, but neither do the users, so it seems like we should here */
@Override
public void stopDecaying() {
_keepDecaying = false;
clear();
}
@Override
protected void decay() {
int currentCount = 0;
long dups = 0;
if (!getWriteLock())
return;
try {
ConcurrentHashSet<ArrayWrapper> tmp = _previous;
currentCount = _current.size();
_previous = _current;
_current = tmp;
_current.clear();
dups = _currentDuplicates;
_currentDuplicates = 0;
} finally { releaseWriteLock(); }
if (_log.shouldLog(Log.DEBUG))
_log.debug("Decaying the filter " + _name + " after inserting " + currentCount
+ " elements and " + dups + " false positives");
_context.statManager().addRateData("router.decayingHashSet." + _name + ".size",
currentCount);
if (currentCount > 0)
_context.statManager().addRateData("router.decayingHashSet." + _name + ".dups",
1000l*1000*dups/currentCount);
}
/**
* This saves the data as-is if the length is <= 8 bytes,
* otherwise it stores an 8-byte hash.
* Hash function is from DataHelper, modded to get
* the maximum entropy given the length of the data.
*/
private static class ArrayWrapper {
private final long _longhashcode;
public ArrayWrapper(byte[] b, int offset, int len) {
int idx = offset;
int shift = Math.min(8, 64 / len);
long lhc = 0;
for (int i = 0; i < len; i++) {
// xor better than + in tests
lhc ^= (((long) b[idx++]) << (i * shift));
}
_longhashcode = lhc;
}
/** faster version for when storing <= 8 bytes */
public ArrayWrapper(long b) {
_longhashcode = b;
}
public int hashCode() {
return (int) _longhashcode;
}
public long longHashCode() {
return _longhashcode;
}
public boolean equals(Object o) {
if (o == null || !(o instanceof ArrayWrapper))
return false;
return ((ArrayWrapper) o).longHashCode() == _longhashcode;
}
}
/**
* vs. DBF, this measures 1.93x faster for testByLong and 2.46x faster for testByBytes.
*/
public static void main(String args[]) {
/** KBytes per sec, 1 message per KByte */
int kbps = 256;
int iterations = 10;
//testSize();
testByLong(kbps, iterations);
testByBytes(kbps, iterations);
}
/** and the answer is: 49.9 bytes. The ArrayWrapper alone measured 16, so that's 34 for the HashSet entry. */
/*****
private static void testSize() {
int qty = 256*1024;
byte b[] = new byte[8];
Random r = new Random();
long old = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
ConcurrentHashSet foo = new ConcurrentHashSet(qty);
for (int i = 0; i < qty; i++) {
r.nextBytes(b);
foo.add(new ArrayWrapper(b, 0, 8));
}
long used = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
System.out.println("Memory per ArrayWrapper: " + (((double) (used - old)) / qty));
}
*****/
/** 8 bytes, simulate the router message validator */
private static void testByLong(int kbps, int numRuns) {
int messages = 60 * 10 * kbps;
Random r = new Random();
DecayingBloomFilter filter = new DecayingHashSet(I2PAppContext.getGlobalContext(), 600*1000, 8);
int falsePositives = 0;
long totalTime = 0;
for (int j = 0; j < numRuns; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < messages; i++) {
if (filter.add(r.nextLong())) {
falsePositives++;
System.out.println("False positive " + falsePositives + " (testByLong j=" + j + " i=" + i + ")");
}
}
totalTime += System.currentTimeMillis() - start;
filter.clear();
}
System.out.println("False postive rate should be " + filter.getFalsePositiveRate());
filter.stopDecaying();
System.out.println("After " + numRuns + " runs pushing " + messages + " entries in "
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
+ falsePositives + " false positives");
}
/** 16 bytes, simulate the tunnel IV validator */
private static void testByBytes(int kbps, int numRuns) {
byte iv[][] = new byte[60*10*kbps][16];
Random r = new Random();
for (int i = 0; i < iv.length; i++)
r.nextBytes(iv[i]);
DecayingBloomFilter filter = new DecayingHashSet(I2PAppContext.getGlobalContext(), 600*1000, 16);
int falsePositives = 0;
long totalTime = 0;
for (int j = 0; j < numRuns; j++) {
long start = System.currentTimeMillis();
for (int i = 0; i < iv.length; i++) {
if (filter.add(iv[i])) {
falsePositives++;
System.out.println("False positive " + falsePositives + " (testByBytes j=" + j + " i=" + i + ")");
}
}
totalTime += System.currentTimeMillis() - start;
filter.clear();
}
System.out.println("False postive rate should be " + filter.getFalsePositiveRate());
filter.stopDecaying();
System.out.println("After " + numRuns + " runs pushing " + iv.length + " entries in "
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
+ falsePositives + " false positives");
}
}

View File

@ -0,0 +1,343 @@
package org.xlattice.crypto.filters;
import java.util.Arrays;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
/**
* A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set
* of k hash functions to determine set membership. Each hash function
* produces a value in the range 0..M-1. The filter is of size M. To
* add a member to the set, apply each function to the new member and
* set the corresponding bit in the filter. For M very large relative
* to k, this will normally set k bits in the filter. To check whether
* x is a member of the set, apply each of the k hash functions to x
* and check whether the corresponding bits are set in the filter. If
* any are not set, x is definitely not a member. If all are set, x
* may be a member. The probability of error (the false positive rate)
* is f = (1 - e^(-kN/M))^k, where N is the number of set members.
*
* This class takes advantage of the fact that SHA1 digests are good-
* quality pseudo-random numbers. The k hash functions are the values
* of distinct sets of bits taken from the 20-byte SHA1 hash. The
* number of bits in the filter, M, is constrained to be a power of
* 2; M == 2^m. The number of bits in each hash function may not
* exceed floor(m/k).
*
* This class is designed to be thread-safe, but this has not been
* exhaustively tested.
*
* @author < A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
*
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
* app - http://xlattice.sourceforge.net/
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*
* Note that this is used only by DecayingBloomFilter, which uses only
* the unsynchronized locked_foo() methods.
* Deprecated for use outside of the router; to be moved to router.jar.
*
* As of 0.8.11, the locked_foo() methods are thread-safe, in that they work,
* but there is a minor risk of false-negatives if two threads are
* accessing the same bloom filter integer.
*/
public class BloomSHA1 {
protected final int m;
protected final int k;
protected int count;
protected final int[] filter;
protected final KeySelector ks;
// convenience variables
protected final int filterBits;
protected final int filterWords;
private final BlockingQueue<int[]> buf;
/* (24,11) too big - see KeySelector
public static void main(String args[]) {
BloomSHA1 b = new BloomSHA1(24, 11);
for (int i = 0; i < 100; i++) {
byte v[] = new byte[32];
v[0] = (byte)i;
b.insert(v);
}
}
*/
/**
* Creates a filter with 2^m bits and k 'hash functions', where
* each hash function is portion of the 160-bit SHA1 hash.
* @param m determines number of bits in filter
* @param k number of hash functionsx
*
* See KeySelector for important restriction on max m and k
*/
public BloomSHA1( int m, int k) {
// XXX need to devise more reasonable set of checks
//if ( m < 2 || m > 20) {
// throw new IllegalArgumentException("m out of range");
//}
//if ( k < 1 || ( k * m > 160 )) {
// throw new IllegalArgumentException(
// "too many hash functions for filter size");
//}
this.m = m;
this.k = k;
filterBits = 1 << m;
filterWords = (filterBits + 31)/32; // round up
filter = new int[filterWords];
ks = new KeySelector(m, k);
buf = new LinkedBlockingQueue(16);
// DEBUG
//System.out.println("Bloom constructor: m = " + m + ", k = " + k
// + "\n filterBits = " + filterBits
// + ", filterWords = " + filterWords);
// END
}
/**
* Creates a filter of 2^m bits, with the number of 'hash functions"
* k defaulting to 8.
* @param m determines size of filter
*/
public BloomSHA1 (int m) {
this(m, 8);
}
/**
* Creates a filter of 2^20 bits with k defaulting to 8.
*/
public BloomSHA1 () {
this (20, 8);
}
/** Clear the filter, unsynchronized */
protected void doClear() {
Arrays.fill(filter, 0);
count = 0;
}
/** Synchronized version */
public void clear() {
synchronized (this) {
doClear();
}
}
/**
* Returns the number of keys which have been inserted. This
* class (BloomSHA1) does not guarantee uniqueness in any sense; if the
* same key is added N times, the number of set members reported
* will increase by N.
*
* @return number of set members
*/
public final int size() {
synchronized (this) {
return count;
}
}
/**
* @return number of bits in filter
*/
public final int capacity () {
return filterBits;
}
/**
* Add a key to the set represented by the filter.
*
* XXX This version does not maintain 4-bit counters, it is not
* a counting Bloom filter.
*
* @param b byte array representing a key (SHA1 digest)
*/
public void insert (byte[]b) { insert(b, 0, b.length); }
public void insert (byte[]b, int offset, int len) {
synchronized(this) {
locked_insert(b, offset, len);
}
}
public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); }
public final void locked_insert(byte[]b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
for (int i = 0; i < k; i++) {
filter[wordOffset[i]] |= 1 << bitOffset[i];
}
count++;
buf.offer(bitOffset);
buf.offer(wordOffset);
}
/**
* Is a key in the filter. Sets up the bit and word offset arrays.
*
* @param b byte array representing a key (SHA1 digest)
* @return true if b is in the filter
*/
protected final boolean isMember(byte[] b) { return isMember(b, 0, b.length); }
protected final boolean isMember(byte[] b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
for (int i = 0; i < k; i++) {
if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
buf.offer(bitOffset);
buf.offer(wordOffset);
return false;
}
}
buf.offer(bitOffset);
buf.offer(wordOffset);
return true;
}
public final boolean locked_member(byte[]b) { return isMember(b); }
public final boolean locked_member(byte[]b, int offset, int len) { return isMember(b, offset, len); }
/**
* Is a key in the filter. External interface, internally synchronized.
*
* @param b byte array representing a key (SHA1 digest)
* @return true if b is in the filter
*/
public final boolean member(byte[]b) { return member(b, 0, b.length); }
public final boolean member(byte[]b, int offset, int len) {
synchronized (this) {
return isMember(b, offset, len);
}
}
/**
* Get the bloom filter offsets for reuse.
* Caller should call release(rv) when done with it.
* @since 0.8.11
*/
public FilterKey getFilterKey(byte[] b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
return new FilterKey(bitOffset, wordOffset);
}
/**
* Add the key to the filter.
* @since 0.8.11
*/
public void locked_insert(FilterKey fk) {
for (int i = 0; i < k; i++) {
filter[fk.wordOffset[i]] |= 1 << fk.bitOffset[i];
}
count++;
}
/**
* Is the key in the filter.
* @since 0.8.11
*/
public boolean locked_member(FilterKey fk) {
for (int i = 0; i < k; i++) {
if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) )
return false;
}
return true;
}
/**
* @since 0.8.11
*/
private int[] acquire() {
int[] rv = buf.poll();
if (rv != null)
return rv;
return new int[k];
}
/**
* @since 0.8.11
*/
public void release(FilterKey fk) {
buf.offer(fk.bitOffset);
buf.offer(fk.wordOffset);
}
/**
* Store the (opaque) bloom filter offsets for reuse.
* @since 0.8.11
*/
public static class FilterKey {
private final int[] bitOffset;
private final int[] wordOffset;
private FilterKey(int[] bitOffset, int[] wordOffset) {
this.bitOffset = bitOffset;
this.wordOffset = wordOffset;
}
}
/**
* @param n number of set members
* @return approximate false positive rate
*/
public final double falsePositives(int n) {
// (1 - e(-kN/M))^k
return java.lang.Math.pow (
(1l - java.lang.Math.exp(0d- ((double)k) * (long)n / filterBits)), k);
}
public final double falsePositives() {
return falsePositives(count);
}
/*****
// DEBUG METHODS
public static String keyToString(byte[] key) {
StringBuilder sb = new StringBuilder().append(key[0]);
for (int i = 1; i < key.length; i++) {
sb.append(".").append(Integer.toString(key[i], 16));
}
return sb.toString();
}
*****/
/** convert 64-bit integer to hex String */
/*****
public static String ltoh (long i) {
StringBuilder sb = new StringBuilder().append("#")
.append(Long.toString(i, 16));
return sb.toString();
}
*****/
/** convert 32-bit integer to String */
/*****
public static String itoh (int i) {
StringBuilder sb = new StringBuilder().append("#")
.append(Integer.toString(i, 16));
return sb.toString();
}
*****/
/** convert single byte to String */
/*****
public static String btoh (byte b) {
int i = 0xff & b;
return itoh(i);
}
*****/
}

View File

@ -0,0 +1,279 @@
package org.xlattice.crypto.filters;
/**
* Given a key, populates arrays determining word and bit offsets into
* a Bloom filter.
*
* @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
*
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
* app - http://xlattice.sourceforge.net/
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*
* As of 0.8.11, bitoffset and wordoffset out parameters moved from fields
* to selector arguments, to allow concurrency.
* ALl methods are now thread-safe.
*/
public class KeySelector {
private final int m;
private final int k;
private final BitSelector bitSel;
private final WordSelector wordSel;
public interface BitSelector {
/**
* @param bitOffset Out parameter of length k
* @since 0.8.11 out parameter added
*/
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset);
}
public interface WordSelector {
/**
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameter added
*/
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset);
}
/** AND with byte to expose index-many bits */
public final static int[] UNMASK = {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767};
/** AND with byte to zero out index-many bits */
public final static int[] MASK = {
~0,~1,~3,~7,~15,~31,~63,~127,~255,~511,~1023,~2047,~4095,~8191,~16383,~32767};
public final static int TWO_UP_15 = 32 * 1024;
/**
* Creates a key selector for a Bloom filter. When a key is presented
* to the getOffsets() method, the k 'hash function' values are
* extracted and used to populate bitOffset and wordOffset arrays which
* specify the k flags to be set or examined in the filter.
*
* @param m size of the filter as a power of 2
* @param k number of 'hash functions'
*
* Note that if k and m are too big, the GenericWordSelector blows up -
* The max for 32-byte keys is m=23 and k=11.
* The precise restriction appears to be:
* ((5k + (k-1)(m-5)) / 8) + 2 < keySizeInBytes
*
* It isn't clear how to fix this.
*/
public KeySelector (int m, int k) {
//if ( (m < 2) || (m > 20)|| (k < 1)
// || (bitOffset == null) || (wordOffset == null)) {
// throw new IllegalArgumentException();
//}
this.m = m;
this.k = k;
bitSel = new GenericBitSelector();
wordSel = new GenericWordSelector();
}
/**
* Extracts the k bit offsets from a key, suitable for general values
* of m and k.
*/
public class GenericBitSelector implements BitSelector {
/** Do the extraction */
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) {
int curBit = 8 * offset;
int curByte;
for (int j = 0; j < k; j++) {
curByte = curBit / 8;
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
// // DEBUG
// System.out.println (
// "this byte = " + btoh(b[curByte])
// + ", next byte = " + btoh(b[curByte + 1])
// + "; curBit=" + curBit + ", curByte= " + curByte
// + ", bitsUnused=" + bitsUnused);
// // END
if (bitsUnused > 5) {
bitOffset[j] = ((0xff & b[curByte])
>> (bitsUnused - 5)) & UNMASK[5];
// // DEBUG
// System.out.println(
// " before shifting: " + btoh(b[curByte])
// + "\n after shifting: "
// + itoh( (0xff & b[curByte]) >> (bitsUnused - 5))
// + "\n mask: " + itoh(UNMASK[5]) );
// // END
} else if (bitsUnused == 5) {
bitOffset[j] = b[curByte] & UNMASK[5];
} else {
bitOffset[j] = (b[curByte] & UNMASK[bitsUnused])
| (((0xff & b[curByte + 1]) >> 3)
& MASK[bitsUnused]);
// // DEBUG
// System.out.println(
// " contribution from first byte: "
// + itoh(b[curByte] & UNMASK[bitsUnused])
// + "\n second byte: " + btoh(b[curByte + 1])
// + "\n shifted: " + itoh((0xff & b[curByte + 1]) >> 3)
// + "\n mask: " + itoh(MASK[bitsUnused])
// + "\n contribution from second byte: "
// + itoh((0xff & b[curByte + 1] >> 3) & MASK[bitsUnused]));
// // END
}
// // DEBUG
// System.out.println (" bitOffset[j] = " + bitOffset[j]);
// // END
curBit += 5;
}
}
}
/**
* Extracts the k word offsets from a key. Suitable for general
* values of m and k. See above for formula for max m and k.
*/
public class GenericWordSelector implements WordSelector {
/** Extract the k offsets into the word offset array */
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) {
int stride = m - 5;
//assert true: stride<16;
int curBit = (k * 5) + (offset * 8);
int curByte;
for (int j = 0; j < k; j++) {
curByte = curBit / 8;
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
// // DEBUG
// System.out.println (
// "curr 3 bytes: " + btoh(b[curByte])
// + (curByte < 19 ?
// " " + btoh(b[curByte + 1]) : "")
// + (curByte < 18 ?
// " " + btoh(b[curByte + 2]) : "")
// + "; curBit=" + curBit + ", curByte= " + curByte
// + ", bitsUnused=" + bitsUnused);
// // END
if (bitsUnused > stride) {
// the value is entirely within the current byte
wordOffset[j] = ((0xff & b[curByte])
>> (bitsUnused - stride))
& UNMASK[stride];
} else if (bitsUnused == stride) {
// the value fills the current byte
wordOffset[j] = b[curByte] & UNMASK[stride];
} else { // bitsUnused < stride
// value occupies more than one byte
// bits from first byte, right-aligned in result
wordOffset[j] = b[curByte] & UNMASK[bitsUnused];
// // DEBUG
// System.out.println(" first byte contributes "
// + itoh(wordOffset[j]));
// // END
// bits from second byte
int bitsToGet = stride - bitsUnused;
if (bitsToGet >= 8) {
// 8 bits from second byte
wordOffset[j] |= (0xff & b[curByte + 1]) << bitsUnused;
// // DEBUG
// System.out.println(" second byte contributes "
// + itoh(
// (0xff & b[curByte + 1]) << bitsUnused
// ));
// // END
// bits from third byte
bitsToGet -= 8;
if (bitsToGet > 0) {
// AIOOBE here if m and k too big (23,11 is the max)
// for a 32-byte key - see above
wordOffset[j] |=
((0xff & b[curByte + 2]) >> (8 - bitsToGet))
<< (stride - bitsToGet) ;
// // DEBUG
// System.out.println(" third byte contributes "
// + itoh(
// (((0xff & b[curByte + 2]) >> (8 - bitsToGet))
// << (stride - bitsToGet))
// ));
// // END
}
} else {
// all remaining bits are within second byte
wordOffset[j] |= ((b[curByte + 1] >> (8 - bitsToGet))
& UNMASK[bitsToGet])
<< bitsUnused;
// // DEBUG
// System.out.println(" second byte contributes "
// + itoh(
// ((b[curByte + 1] >> (8 - bitsToGet))
// & UNMASK[bitsToGet])
// << bitsUnused
// ));
// // END
}
}
// // DEBUG
// System.out.println (
// " wordOffset[" + j + "] = " + wordOffset[j]
// + ", " + itoh(wordOffset[j])
// );
// // END
curBit += stride;
}
}
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
* @param bitOffset Out parameter of length k
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameters added
*/
public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) {
getOffsets(key, 0, key.length, bitOffset, wordOffset);
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
* @param bitOffset Out parameter of length k
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameters added
*/
public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) {
// skip these checks for speed
//if (key == null) {
// throw new IllegalArgumentException("null key");
//}
//if (len < 20) {
// throw new IllegalArgumentException(
// "key must be at least 20 bytes long");
//}
// // DEBUG
// System.out.println("KeySelector.getOffsets for "
// + BloomSHA1.keyToString(b));
// // END
bitSel.getBitSelectors(key, off, len, bitOffset);
wordSel.getWordSelectors(key, off, len, wordOffset);
}
/*****
// DEBUG METHODS ////////////////////////////////////////////////
String itoh(int i) {
return BloomSHA1.itoh(i);
}
String btoh(byte b) {
return BloomSHA1.btoh(b);
}
*****/
}