forked from I2P_Developers/i2p.i2p
move DecayingBloomFilter, DecayingHashSet, and xlattice filters from core to router
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
package net.i2p.router;
|
||||
|
||||
import net.i2p.util.DecayingBloomFilter;
|
||||
import net.i2p.util.DecayingHashSet;
|
||||
import net.i2p.router.util.DecayingBloomFilter;
|
||||
import net.i2p.router.util.DecayingHashSet;
|
||||
import net.i2p.util.Log;
|
||||
|
||||
/**
|
||||
|
@ -4,8 +4,8 @@ import java.util.Map;
|
||||
|
||||
import net.i2p.data.Hash;
|
||||
import net.i2p.router.RouterContext;
|
||||
import net.i2p.util.DecayingBloomFilter;
|
||||
import net.i2p.util.DecayingHashSet;
|
||||
import net.i2p.router.util.DecayingBloomFilter;
|
||||
import net.i2p.router.util.DecayingHashSet;
|
||||
import net.i2p.util.Log;
|
||||
|
||||
/**
|
||||
|
@ -3,9 +3,9 @@ package net.i2p.router.tunnel;
|
||||
import net.i2p.data.ByteArray;
|
||||
import net.i2p.data.DataHelper;
|
||||
import net.i2p.router.RouterContext;
|
||||
import net.i2p.router.util.DecayingBloomFilter;
|
||||
import net.i2p.router.util.DecayingHashSet;
|
||||
import net.i2p.util.ByteCache;
|
||||
import net.i2p.util.DecayingBloomFilter;
|
||||
import net.i2p.util.DecayingHashSet;
|
||||
|
||||
/**
|
||||
* Manage the IV validation for all of the router's tunnels by way of a big
|
||||
|
@ -9,8 +9,8 @@ import net.i2p.data.PrivateKey;
|
||||
import net.i2p.data.SessionKey;
|
||||
import net.i2p.data.i2np.BuildRequestRecord;
|
||||
import net.i2p.data.i2np.TunnelBuildMessage;
|
||||
import net.i2p.util.DecayingBloomFilter;
|
||||
import net.i2p.util.DecayingHashSet;
|
||||
import net.i2p.router.util.DecayingBloomFilter;
|
||||
import net.i2p.router.util.DecayingHashSet;
|
||||
import net.i2p.util.Log;
|
||||
|
||||
/**
|
||||
|
455
router/java/src/net/i2p/router/util/DecayingBloomFilter.java
Normal file
455
router/java/src/net/i2p/router/util/DecayingBloomFilter.java
Normal file
@ -0,0 +1,455 @@
|
||||
package net.i2p.router.util;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import net.i2p.I2PAppContext;
|
||||
import net.i2p.data.DataHelper;
|
||||
import net.i2p.util.Log;
|
||||
import net.i2p.util.SimpleTimer;
|
||||
|
||||
import org.xlattice.crypto.filters.BloomSHA1;
|
||||
|
||||
/**
|
||||
* Series of bloom filters which decay over time, allowing their continual use
|
||||
* for time sensitive data. This has a fixed size (per
|
||||
* period, using two periods overall), allowing this to pump through hundreds of
|
||||
* entries per second with virtually no false positive rate. Down the line,
|
||||
* this may be refactored to allow tighter control of the size necessary for the
|
||||
* contained bloom filters.
|
||||
*
|
||||
* See main() for an analysis of false positive rate.
|
||||
* See BloomFilterIVValidator for instantiation parameters.
|
||||
* See DecayingHashSet for a smaller and simpler version.
|
||||
* @see net.i2p.router.tunnel.BloomFilterIVValidator
|
||||
* @see net.i2p.router.util.DecayingHashSet
|
||||
*/
|
||||
public class DecayingBloomFilter {
|
||||
protected final I2PAppContext _context;
|
||||
protected final Log _log;
|
||||
private BloomSHA1 _current;
|
||||
private BloomSHA1 _previous;
|
||||
protected final int _durationMs;
|
||||
protected final int _entryBytes;
|
||||
private final byte _extenders[][];
|
||||
private final byte _extended[];
|
||||
private final byte _longToEntry[];
|
||||
private final long _longToEntryMask;
|
||||
protected long _currentDuplicates;
|
||||
protected volatile boolean _keepDecaying;
|
||||
protected final SimpleTimer.TimedEvent _decayEvent;
|
||||
/** just for logging */
|
||||
protected final String _name;
|
||||
/** synchronize against this lock when switching double buffers */
|
||||
protected final ReentrantReadWriteLock _reorganizeLock = new ReentrantReadWriteLock();
|
||||
|
||||
private static final int DEFAULT_M = 23;
|
||||
private static final int DEFAULT_K = 11;
|
||||
/** true for debugging */
|
||||
private static final boolean ALWAYS_MISS = false;
|
||||
|
||||
/** only for extension by DHS */
|
||||
protected DecayingBloomFilter(int durationMs, int entryBytes, String name, I2PAppContext context) {
|
||||
_context = context;
|
||||
_log = context.logManager().getLog(getClass());
|
||||
_entryBytes = entryBytes;
|
||||
_name = name;
|
||||
_durationMs = durationMs;
|
||||
// all final
|
||||
_extenders = null;
|
||||
_extended = null;
|
||||
_longToEntry = null;
|
||||
_longToEntryMask = 0;
|
||||
context.addShutdownTask(new Shutdown());
|
||||
_decayEvent = new DecayEvent();
|
||||
_keepDecaying = true;
|
||||
SimpleTimer.getInstance().addEvent(_decayEvent, _durationMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a bloom filter that will decay its entries over time.
|
||||
*
|
||||
* @param durationMs entries last for at least this long, but no more than twice this long
|
||||
* @param entryBytes how large are the entries to be added? if this is less than 32 bytes,
|
||||
* the entries added will be expanded by concatenating their XORing
|
||||
* against with sufficient random values.
|
||||
*/
|
||||
public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes) {
|
||||
this(context, durationMs, entryBytes, "DBF");
|
||||
}
|
||||
|
||||
/** @param name just for logging / debugging / stats */
|
||||
public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes, String name) {
|
||||
// this is instantiated in four different places, they may have different
|
||||
// requirements, but for now use this as a gross method of memory reduction.
|
||||
// m == 23 => 1MB each BloomSHA1 (4 pairs = 8MB total)
|
||||
this(context, durationMs, entryBytes, name, context.getProperty("router.decayingBloomFilterM", DEFAULT_M));
|
||||
}
|
||||
|
||||
/** @param m filter size exponent */
|
||||
public DecayingBloomFilter(I2PAppContext context, int durationMs, int entryBytes, String name, int m) {
|
||||
_context = context;
|
||||
_log = context.logManager().getLog(DecayingBloomFilter.class);
|
||||
_entryBytes = entryBytes;
|
||||
_name = name;
|
||||
int k = DEFAULT_K;
|
||||
// max is (23,11) or (26,10); see KeySelector for details
|
||||
if (m > DEFAULT_M)
|
||||
k--;
|
||||
_current = new BloomSHA1(m, k);
|
||||
_previous = new BloomSHA1(m, k);
|
||||
_durationMs = durationMs;
|
||||
int numExtenders = (32+ (entryBytes-1))/entryBytes - 1;
|
||||
if (numExtenders < 0)
|
||||
numExtenders = 0;
|
||||
_extenders = new byte[numExtenders][entryBytes];
|
||||
for (int i = 0; i < numExtenders; i++)
|
||||
_context.random().nextBytes(_extenders[i]);
|
||||
if (numExtenders > 0) {
|
||||
_extended = new byte[32];
|
||||
_longToEntry = new byte[_entryBytes];
|
||||
_longToEntryMask = (1l << (_entryBytes * 8l)) -1;
|
||||
} else {
|
||||
// final
|
||||
_extended = null;
|
||||
_longToEntry = null;
|
||||
_longToEntryMask = 0;
|
||||
}
|
||||
_decayEvent = new DecayEvent();
|
||||
_keepDecaying = true;
|
||||
SimpleTimer.getInstance().addEvent(_decayEvent, _durationMs);
|
||||
if (_log.shouldLog(Log.WARN))
|
||||
_log.warn("New DBF " + name + " m = " + m + " k = " + k + " entryBytes = " + entryBytes +
|
||||
" numExtenders = " + numExtenders + " cycle (s) = " + (durationMs / 1000));
|
||||
// try to get a handle on memory usage vs. false positives
|
||||
context.statManager().createRateStat("router.decayingBloomFilter." + name + ".size",
|
||||
"Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
|
||||
context.statManager().createRateStat("router.decayingBloomFilter." + name + ".dups",
|
||||
"1000000 * Duplicates/Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
|
||||
context.statManager().createRateStat("router.decayingBloomFilter." + name + ".log10(falsePos)",
|
||||
"log10 of the false positive rate (must have net.i2p.util.DecayingBloomFilter=DEBUG)",
|
||||
"Router", new long[] { 10 * Math.max(60*1000, durationMs) });
|
||||
context.addShutdownTask(new Shutdown());
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 0.8.8
|
||||
*/
|
||||
private class Shutdown implements Runnable {
|
||||
public void run() {
|
||||
clear();
|
||||
}
|
||||
}
|
||||
|
||||
public long getCurrentDuplicateCount() { return _currentDuplicates; }
|
||||
|
||||
/** unsynchronized but only used for logging elsewhere */
|
||||
public int getInsertedCount() {
|
||||
return _current.size() + _previous.size();
|
||||
}
|
||||
|
||||
/** unsynchronized, only used for logging elsewhere */
|
||||
public double getFalsePositiveRate() {
|
||||
return _current.falsePositives();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry added is a duplicate
|
||||
*/
|
||||
public boolean add(byte entry[]) {
|
||||
return add(entry, 0, entry.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry added is a duplicate
|
||||
*/
|
||||
public boolean add(byte entry[], int off, int len) {
|
||||
if (ALWAYS_MISS) return false;
|
||||
if (entry == null)
|
||||
throw new IllegalArgumentException("Null entry");
|
||||
if (len != _entryBytes)
|
||||
throw new IllegalArgumentException("Bad entry [" + len + ", expected "
|
||||
+ _entryBytes + "]");
|
||||
getReadLock();
|
||||
try {
|
||||
return locked_add(entry, off, len, true);
|
||||
} finally { releaseReadLock(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry added is a duplicate. the number of low order
|
||||
* bits used is determined by the entryBytes parameter used on creation of the
|
||||
* filter.
|
||||
*
|
||||
*/
|
||||
public boolean add(long entry) {
|
||||
if (ALWAYS_MISS) return false;
|
||||
if (_entryBytes <= 7)
|
||||
entry = ((entry ^ _longToEntryMask) & ((1 << 31)-1)) | (entry ^ _longToEntryMask);
|
||||
//entry &= _longToEntryMask;
|
||||
if (entry < 0) {
|
||||
DataHelper.toLong(_longToEntry, 0, _entryBytes, 0-entry);
|
||||
_longToEntry[0] |= (1 << 7);
|
||||
} else {
|
||||
DataHelper.toLong(_longToEntry, 0, _entryBytes, entry);
|
||||
}
|
||||
getReadLock();
|
||||
try {
|
||||
return locked_add(_longToEntry, 0, _longToEntry.length, true);
|
||||
} finally { releaseReadLock(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry is already known. this does NOT add the
|
||||
* entry however.
|
||||
*
|
||||
*/
|
||||
public boolean isKnown(long entry) {
|
||||
if (ALWAYS_MISS) return false;
|
||||
if (_entryBytes <= 7)
|
||||
entry = ((entry ^ _longToEntryMask) & ((1 << 31)-1)) | (entry ^ _longToEntryMask);
|
||||
if (entry < 0) {
|
||||
DataHelper.toLong(_longToEntry, 0, _entryBytes, 0-entry);
|
||||
_longToEntry[0] |= (1 << 7);
|
||||
} else {
|
||||
DataHelper.toLong(_longToEntry, 0, _entryBytes, entry);
|
||||
}
|
||||
getReadLock();
|
||||
try {
|
||||
return locked_add(_longToEntry, 0, _longToEntry.length, false);
|
||||
} finally { releaseReadLock(); }
|
||||
}
|
||||
|
||||
private boolean locked_add(byte entry[], int offset, int len, boolean addIfNew) {
|
||||
if (_extended != null) {
|
||||
// extend the entry to 32 bytes
|
||||
System.arraycopy(entry, offset, _extended, 0, len);
|
||||
for (int i = 0; i < _extenders.length; i++)
|
||||
DataHelper.xor(entry, offset, _extenders[i], 0, _extended, _entryBytes * (i+1), _entryBytes);
|
||||
|
||||
BloomSHA1.FilterKey key = _current.getFilterKey(_extended, 0, 32);
|
||||
boolean seen = _current.locked_member(key);
|
||||
if (!seen)
|
||||
seen = _previous.locked_member(key);
|
||||
if (seen) {
|
||||
_currentDuplicates++;
|
||||
_current.release(key);
|
||||
return true;
|
||||
} else {
|
||||
if (addIfNew) {
|
||||
_current.locked_insert(key);
|
||||
}
|
||||
_current.release(key);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
BloomSHA1.FilterKey key = _current.getFilterKey(entry, offset, len);
|
||||
boolean seen = _current.locked_member(key);
|
||||
if (!seen)
|
||||
seen = _previous.locked_member(key);
|
||||
if (seen) {
|
||||
_currentDuplicates++;
|
||||
_current.release(key);
|
||||
return true;
|
||||
} else {
|
||||
if (addIfNew) {
|
||||
_current.locked_insert(key);
|
||||
}
|
||||
_current.release(key);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
if (!getWriteLock())
|
||||
return;
|
||||
try {
|
||||
_current.clear();
|
||||
_previous.clear();
|
||||
_currentDuplicates = 0;
|
||||
} finally { releaseWriteLock(); }
|
||||
}
|
||||
|
||||
public void stopDecaying() {
|
||||
_keepDecaying = false;
|
||||
SimpleTimer.getInstance().removeEvent(_decayEvent);
|
||||
}
|
||||
|
||||
protected void decay() {
|
||||
int currentCount = 0;
|
||||
long dups = 0;
|
||||
double fpr = 0d;
|
||||
if (!getWriteLock())
|
||||
return;
|
||||
try {
|
||||
BloomSHA1 tmp = _previous;
|
||||
currentCount = _current.size();
|
||||
if (_log.shouldLog(Log.DEBUG) && currentCount > 0)
|
||||
fpr = _current.falsePositives();
|
||||
_previous = _current;
|
||||
_current = tmp;
|
||||
_current.clear();
|
||||
dups = _currentDuplicates;
|
||||
_currentDuplicates = 0;
|
||||
} finally { releaseWriteLock(); }
|
||||
if (_log.shouldLog(Log.DEBUG))
|
||||
_log.debug("Decaying the filter " + _name + " after inserting " + currentCount
|
||||
+ " elements and " + dups + " false positives with FPR = " + fpr);
|
||||
_context.statManager().addRateData("router.decayingBloomFilter." + _name + ".size",
|
||||
currentCount);
|
||||
if (currentCount > 0)
|
||||
_context.statManager().addRateData("router.decayingBloomFilter." + _name + ".dups",
|
||||
1000l*1000*dups/currentCount);
|
||||
if (fpr > 0d) {
|
||||
// only if log.shouldLog(Log.DEBUG) ...
|
||||
long exponent = (long) Math.log10(fpr);
|
||||
_context.statManager().addRateData("router.decayingBloomFilter." + _name + ".log10(falsePos)",
|
||||
exponent);
|
||||
}
|
||||
}
|
||||
|
||||
private class DecayEvent implements SimpleTimer.TimedEvent {
|
||||
public void timeReached() {
|
||||
if (_keepDecaying) {
|
||||
decay();
|
||||
SimpleTimer.getInstance().addEvent(DecayEvent.this, _durationMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @since 0.8.11 moved from DecayingHashSet */
|
||||
protected void getReadLock() {
|
||||
_reorganizeLock.readLock().lock();
|
||||
}
|
||||
|
||||
/** @since 0.8.11 moved from DecayingHashSet */
|
||||
protected void releaseReadLock() {
|
||||
_reorganizeLock.readLock().unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the lock was acquired
|
||||
* @since 0.8.11 moved from DecayingHashSet
|
||||
*/
|
||||
protected boolean getWriteLock() {
|
||||
try {
|
||||
boolean rv = _reorganizeLock.writeLock().tryLock(5000, TimeUnit.MILLISECONDS);
|
||||
if (!rv)
|
||||
_log.error("no lock, size is: " + _reorganizeLock.getQueueLength(), new Exception("rats"));
|
||||
return rv;
|
||||
} catch (InterruptedException ie) {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @since 0.8.11 moved from DecayingHashSet */
|
||||
protected void releaseWriteLock() {
|
||||
_reorganizeLock.writeLock().unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* This filter is used only for participants and OBEPs, not
|
||||
* IBGWs, so depending on your assumptions of avg. tunnel length,
|
||||
* the performance is somewhat better than the gross share BW
|
||||
* would indicate.
|
||||
*
|
||||
*<pre>
|
||||
* Following stats for m=23, k=11:
|
||||
* Theoretical false positive rate for 16 KBps: 1.17E-21
|
||||
* Theoretical false positive rate for 24 KBps: 9.81E-20
|
||||
* Theoretical false positive rate for 32 KBps: 2.24E-18
|
||||
* Theoretical false positive rate for 256 KBps: 7.45E-9
|
||||
* Theoretical false positive rate for 512 KBps: 5.32E-6
|
||||
* Theoretical false positive rate for 1024 KBps: 1.48E-3
|
||||
* Then it gets bad: 1280 .67%; 1536 2.0%; 1792 4.4%; 2048 8.2%.
|
||||
*
|
||||
* Following stats for m=24, k=10:
|
||||
* 1280 4.5E-5; 1792 5.6E-4; 2048 0.14%
|
||||
*
|
||||
* Following stats for m=25, k=10:
|
||||
* 1792 2.4E-6; 4096 0.14%; 5120 0.6%; 6144 1.7%; 8192 6.8%; 10240 15%
|
||||
*</pre>
|
||||
*/
|
||||
public static void main(String args[]) {
|
||||
System.out.println("Usage: DecayingBloomFilter [kbps [m [iterations]]] (default 256 23 10)");
|
||||
int kbps = 256;
|
||||
if (args.length >= 1) {
|
||||
try {
|
||||
kbps = Integer.parseInt(args[0]);
|
||||
} catch (NumberFormatException nfe) {}
|
||||
}
|
||||
int m = DEFAULT_M;
|
||||
if (args.length >= 2) {
|
||||
try {
|
||||
m = Integer.parseInt(args[1]);
|
||||
} catch (NumberFormatException nfe) {}
|
||||
}
|
||||
int iterations = 10;
|
||||
if (args.length >= 3) {
|
||||
try {
|
||||
iterations = Integer.parseInt(args[2]);
|
||||
} catch (NumberFormatException nfe) {}
|
||||
}
|
||||
testByLong(kbps, m, iterations);
|
||||
testByBytes(kbps, m, iterations);
|
||||
}
|
||||
|
||||
private static void testByLong(int kbps, int m, int numRuns) {
|
||||
int messages = 60 * 10 * kbps;
|
||||
Random r = new Random();
|
||||
DecayingBloomFilter filter = new DecayingBloomFilter(I2PAppContext.getGlobalContext(), 600*1000, 8, "test", m);
|
||||
int falsePositives = 0;
|
||||
long totalTime = 0;
|
||||
double fpr = 0d;
|
||||
for (int j = 0; j < numRuns; j++) {
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < messages; i++) {
|
||||
if (filter.add(r.nextLong())) {
|
||||
falsePositives++;
|
||||
//System.out.println("False positive " + falsePositives + " (testByLong j=" + j + " i=" + i + ")");
|
||||
}
|
||||
}
|
||||
totalTime += System.currentTimeMillis() - start;
|
||||
fpr = filter.getFalsePositiveRate();
|
||||
filter.clear();
|
||||
}
|
||||
filter.stopDecaying();
|
||||
System.out.println("False postive rate should be " + fpr);
|
||||
System.out.println("After " + numRuns + " runs pushing " + messages + " entries in "
|
||||
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
|
||||
+ falsePositives + " false positives");
|
||||
|
||||
}
|
||||
|
||||
private static void testByBytes(int kbps, int m, int numRuns) {
|
||||
byte iv[][] = new byte[60*10*kbps][16];
|
||||
Random r = new Random();
|
||||
for (int i = 0; i < iv.length; i++)
|
||||
r.nextBytes(iv[i]);
|
||||
|
||||
DecayingBloomFilter filter = new DecayingBloomFilter(I2PAppContext.getGlobalContext(), 600*1000, 16, "test", m);
|
||||
int falsePositives = 0;
|
||||
long totalTime = 0;
|
||||
double fpr = 0d;
|
||||
for (int j = 0; j < numRuns; j++) {
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < iv.length; i++) {
|
||||
if (filter.add(iv[i])) {
|
||||
falsePositives++;
|
||||
//System.out.println("False positive " + falsePositives + " (testByBytes j=" + j + " i=" + i + ")");
|
||||
}
|
||||
}
|
||||
totalTime += System.currentTimeMillis() - start;
|
||||
fpr = filter.getFalsePositiveRate();
|
||||
filter.clear();
|
||||
}
|
||||
filter.stopDecaying();
|
||||
System.out.println("False postive rate should be " + fpr);
|
||||
System.out.println("After " + numRuns + " runs pushing " + iv.length + " entries in "
|
||||
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
|
||||
+ falsePositives + " false positives");
|
||||
//System.out.println("inserted: " + bloom.size() + " with " + bloom.capacity()
|
||||
// + " (" + bloom.falsePositives()*100.0d + "% false positive)");
|
||||
}
|
||||
}
|
335
router/java/src/net/i2p/router/util/DecayingHashSet.java
Normal file
335
router/java/src/net/i2p/router/util/DecayingHashSet.java
Normal file
@ -0,0 +1,335 @@
|
||||
package net.i2p.router.util;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import net.i2p.I2PAppContext;
|
||||
import net.i2p.data.DataHelper;
|
||||
import net.i2p.util.ConcurrentHashSet;
|
||||
import net.i2p.util.Log;
|
||||
|
||||
|
||||
/**
|
||||
* Double buffered hash set.
|
||||
* Since DecayingBloomFilter was instantiated 4 times for a total memory usage
|
||||
* of 8MB, it seemed like we could do a lot better, given these usage stats
|
||||
* on a class L router:
|
||||
*
|
||||
* ./router/java/src/net/i2p/router/tunnel/BuildMessageProcessor.java:
|
||||
* 32 bytes, peak 10 entries in 1m
|
||||
* (320 peak entries seen on fast router)
|
||||
*
|
||||
* ./router/java/src/net/i2p/router/transport/udp/InboundMessageFragments.java:
|
||||
* 4 bytes, peak 150 entries in 10s
|
||||
* (1600 peak entries seen on fast router)
|
||||
*
|
||||
* ./router/java/src/net/i2p/router/MessageValidator.java:
|
||||
* 8 bytes, peak 1K entries in 2m
|
||||
* (36K peak entries seen on fast router)
|
||||
*
|
||||
* ./router/java/src/net/i2p/router/tunnel/BloomFilterIVValidator.java:
|
||||
* 16 bytes, peak 15K entries in 10m
|
||||
*
|
||||
* If the ArrayWrapper object in the HashSet is 50 bytes, and BloomSHA1(23, 11) is 1MB,
|
||||
* then for less than 20K entries this is smaller.
|
||||
* And this uses space proportional to traffiic, so it doesn't penalize small routers
|
||||
* with a fixed 8MB.
|
||||
* So let's try it for the first 2 or 3, for now.
|
||||
*
|
||||
* Also, DBF is syncrhonized, and uses SimpleTimer.
|
||||
* Here we use a read/write lock, with synchronization only
|
||||
* when switching double buffers, and we use SimpleScheduler.
|
||||
*
|
||||
* Yes, we could stare at stats all day, and try to calculate an acceptable
|
||||
* false-positive rate for each of the above uses, then estimate the DBF size
|
||||
* required to meet that rate for a given usage. Or even start adjusting the
|
||||
* Bloom filter m and k values on a per-DBF basis. But it's a whole lot easier
|
||||
* to implement something with a zero false positive rate, and uses less memory
|
||||
* for almost all bandwidth classes.
|
||||
*
|
||||
* This has a strictly zero false positive rate for <= 8 byte keys.
|
||||
* For larger keys, it is 1 / (2**64) ~= 5E-20, which is better than
|
||||
* DBF for any entry count greater than about 14K.
|
||||
*
|
||||
* DBF has a zero false negative rate over the period
|
||||
* 2 * durationMs. And a 100% false negative rate beyond that period.
|
||||
* This has the same properties.
|
||||
*
|
||||
* This performs about twice as fast as DBF in the test below.
|
||||
*
|
||||
* @author zzz
|
||||
*/
|
||||
public class DecayingHashSet extends DecayingBloomFilter {
|
||||
private ConcurrentHashSet<ArrayWrapper> _current;
|
||||
private ConcurrentHashSet<ArrayWrapper> _previous;
|
||||
|
||||
/**
|
||||
* Create a double-buffered hash set that will decay its entries over time.
|
||||
*
|
||||
* @param durationMs entries last for at least this long, but no more than twice this long
|
||||
* @param entryBytes how large are the entries to be added? 1 to 32 bytes
|
||||
*/
|
||||
public DecayingHashSet(I2PAppContext context, int durationMs, int entryBytes) {
|
||||
this(context, durationMs, entryBytes, "DHS");
|
||||
}
|
||||
|
||||
/** @param name just for logging / debugging / stats */
|
||||
public DecayingHashSet(I2PAppContext context, int durationMs, int entryBytes, String name) {
|
||||
super(durationMs, entryBytes, name, context);
|
||||
if (entryBytes <= 0 || entryBytes > 32)
|
||||
throw new IllegalArgumentException("Bad size");
|
||||
_current = new ConcurrentHashSet(128);
|
||||
_previous = new ConcurrentHashSet(128);
|
||||
if (_log.shouldLog(Log.WARN))
|
||||
_log.warn("New DHS " + name + " entryBytes = " + entryBytes +
|
||||
" cycle (s) = " + (durationMs / 1000));
|
||||
// try to get a handle on memory usage vs. false positives
|
||||
context.statManager().createRateStat("router.decayingHashSet." + name + ".size",
|
||||
"Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
|
||||
context.statManager().createRateStat("router.decayingHashSet." + name + ".dups",
|
||||
"1000000 * Duplicates/Size", "Router", new long[] { 10 * Math.max(60*1000, durationMs) });
|
||||
}
|
||||
|
||||
/** unsynchronized but only used for logging elsewhere */
|
||||
@Override
|
||||
public int getInsertedCount() {
|
||||
return _current.size() + _previous.size();
|
||||
}
|
||||
|
||||
/** pointless, only used for logging elsewhere */
|
||||
@Override
|
||||
public double getFalsePositiveRate() {
|
||||
if (_entryBytes <= 8)
|
||||
return 0d;
|
||||
return 1d / Math.pow(2d, 64d); // 5.4E-20
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry added is a duplicate
|
||||
*/
|
||||
@Override
|
||||
public boolean add(byte entry[], int off, int len) {
|
||||
if (entry == null)
|
||||
throw new IllegalArgumentException("Null entry");
|
||||
if (len != _entryBytes)
|
||||
throw new IllegalArgumentException("Bad entry [" + len + ", expected "
|
||||
+ _entryBytes + "]");
|
||||
ArrayWrapper w = new ArrayWrapper(entry, off, len);
|
||||
getReadLock();
|
||||
try {
|
||||
return locked_add(w, true);
|
||||
} finally { releaseReadLock(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry added is a duplicate. the number of low order
|
||||
* bits used is determined by the entryBytes parameter used on creation of the
|
||||
* filter.
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
public boolean add(long entry) {
|
||||
return add(entry, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the entry is already known. this does NOT add the
|
||||
* entry however.
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
public boolean isKnown(long entry) {
|
||||
return add(entry, false);
|
||||
}
|
||||
|
||||
private boolean add(long entry, boolean addIfNew) {
|
||||
ArrayWrapper w = new ArrayWrapper(entry);
|
||||
getReadLock();
|
||||
try {
|
||||
return locked_add(w, addIfNew);
|
||||
} finally { releaseReadLock(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* @param addIfNew if true, add the element to current if it is not already there or in previous;
|
||||
* if false, only check
|
||||
* @return if the element is in either the current or previous set
|
||||
*/
|
||||
private boolean locked_add(ArrayWrapper w, boolean addIfNew) {
|
||||
boolean seen = _previous.contains(w);
|
||||
// only access _current once.
|
||||
if (!seen) {
|
||||
if (addIfNew)
|
||||
seen = !_current.add(w);
|
||||
else
|
||||
seen = _current.contains(w);
|
||||
}
|
||||
if (seen) {
|
||||
// why increment if addIfNew == false? Only used for stats...
|
||||
_currentDuplicates++;
|
||||
}
|
||||
return seen;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
_current.clear();
|
||||
_previous.clear();
|
||||
_currentDuplicates = 0;
|
||||
}
|
||||
|
||||
/** super doesn't call clear, but neither do the users, so it seems like we should here */
|
||||
@Override
|
||||
public void stopDecaying() {
|
||||
_keepDecaying = false;
|
||||
clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void decay() {
|
||||
int currentCount = 0;
|
||||
long dups = 0;
|
||||
if (!getWriteLock())
|
||||
return;
|
||||
try {
|
||||
ConcurrentHashSet<ArrayWrapper> tmp = _previous;
|
||||
currentCount = _current.size();
|
||||
_previous = _current;
|
||||
_current = tmp;
|
||||
_current.clear();
|
||||
dups = _currentDuplicates;
|
||||
_currentDuplicates = 0;
|
||||
} finally { releaseWriteLock(); }
|
||||
|
||||
if (_log.shouldLog(Log.DEBUG))
|
||||
_log.debug("Decaying the filter " + _name + " after inserting " + currentCount
|
||||
+ " elements and " + dups + " false positives");
|
||||
_context.statManager().addRateData("router.decayingHashSet." + _name + ".size",
|
||||
currentCount);
|
||||
if (currentCount > 0)
|
||||
_context.statManager().addRateData("router.decayingHashSet." + _name + ".dups",
|
||||
1000l*1000*dups/currentCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* This saves the data as-is if the length is <= 8 bytes,
|
||||
* otherwise it stores an 8-byte hash.
|
||||
* Hash function is from DataHelper, modded to get
|
||||
* the maximum entropy given the length of the data.
|
||||
*/
|
||||
private static class ArrayWrapper {
|
||||
private final long _longhashcode;
|
||||
|
||||
public ArrayWrapper(byte[] b, int offset, int len) {
|
||||
int idx = offset;
|
||||
int shift = Math.min(8, 64 / len);
|
||||
long lhc = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
// xor better than + in tests
|
||||
lhc ^= (((long) b[idx++]) << (i * shift));
|
||||
}
|
||||
_longhashcode = lhc;
|
||||
}
|
||||
|
||||
/** faster version for when storing <= 8 bytes */
|
||||
public ArrayWrapper(long b) {
|
||||
_longhashcode = b;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return (int) _longhashcode;
|
||||
}
|
||||
|
||||
public long longHashCode() {
|
||||
return _longhashcode;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (o == null || !(o instanceof ArrayWrapper))
|
||||
return false;
|
||||
return ((ArrayWrapper) o).longHashCode() == _longhashcode;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* vs. DBF, this measures 1.93x faster for testByLong and 2.46x faster for testByBytes.
|
||||
*/
|
||||
public static void main(String args[]) {
|
||||
/** KBytes per sec, 1 message per KByte */
|
||||
int kbps = 256;
|
||||
int iterations = 10;
|
||||
//testSize();
|
||||
testByLong(kbps, iterations);
|
||||
testByBytes(kbps, iterations);
|
||||
}
|
||||
|
||||
/** and the answer is: 49.9 bytes. The ArrayWrapper alone measured 16, so that's 34 for the HashSet entry. */
|
||||
/*****
|
||||
private static void testSize() {
|
||||
int qty = 256*1024;
|
||||
byte b[] = new byte[8];
|
||||
Random r = new Random();
|
||||
long old = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
|
||||
ConcurrentHashSet foo = new ConcurrentHashSet(qty);
|
||||
for (int i = 0; i < qty; i++) {
|
||||
r.nextBytes(b);
|
||||
foo.add(new ArrayWrapper(b, 0, 8));
|
||||
}
|
||||
long used = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
|
||||
System.out.println("Memory per ArrayWrapper: " + (((double) (used - old)) / qty));
|
||||
}
|
||||
*****/
|
||||
|
||||
/** 8 bytes, simulate the router message validator */
|
||||
private static void testByLong(int kbps, int numRuns) {
|
||||
int messages = 60 * 10 * kbps;
|
||||
Random r = new Random();
|
||||
DecayingBloomFilter filter = new DecayingHashSet(I2PAppContext.getGlobalContext(), 600*1000, 8);
|
||||
int falsePositives = 0;
|
||||
long totalTime = 0;
|
||||
for (int j = 0; j < numRuns; j++) {
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < messages; i++) {
|
||||
if (filter.add(r.nextLong())) {
|
||||
falsePositives++;
|
||||
System.out.println("False positive " + falsePositives + " (testByLong j=" + j + " i=" + i + ")");
|
||||
}
|
||||
}
|
||||
totalTime += System.currentTimeMillis() - start;
|
||||
filter.clear();
|
||||
}
|
||||
System.out.println("False postive rate should be " + filter.getFalsePositiveRate());
|
||||
filter.stopDecaying();
|
||||
System.out.println("After " + numRuns + " runs pushing " + messages + " entries in "
|
||||
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
|
||||
+ falsePositives + " false positives");
|
||||
|
||||
}
|
||||
|
||||
/** 16 bytes, simulate the tunnel IV validator */
|
||||
private static void testByBytes(int kbps, int numRuns) {
|
||||
byte iv[][] = new byte[60*10*kbps][16];
|
||||
Random r = new Random();
|
||||
for (int i = 0; i < iv.length; i++)
|
||||
r.nextBytes(iv[i]);
|
||||
|
||||
DecayingBloomFilter filter = new DecayingHashSet(I2PAppContext.getGlobalContext(), 600*1000, 16);
|
||||
int falsePositives = 0;
|
||||
long totalTime = 0;
|
||||
for (int j = 0; j < numRuns; j++) {
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < iv.length; i++) {
|
||||
if (filter.add(iv[i])) {
|
||||
falsePositives++;
|
||||
System.out.println("False positive " + falsePositives + " (testByBytes j=" + j + " i=" + i + ")");
|
||||
}
|
||||
}
|
||||
totalTime += System.currentTimeMillis() - start;
|
||||
filter.clear();
|
||||
}
|
||||
System.out.println("False postive rate should be " + filter.getFalsePositiveRate());
|
||||
filter.stopDecaying();
|
||||
System.out.println("After " + numRuns + " runs pushing " + iv.length + " entries in "
|
||||
+ DataHelper.formatDuration(totalTime/numRuns) + " per run, there were "
|
||||
+ falsePositives + " false positives");
|
||||
}
|
||||
}
|
343
router/java/src/org/xlattice/crypto/filters/BloomSHA1.java
Normal file
343
router/java/src/org/xlattice/crypto/filters/BloomSHA1.java
Normal file
@ -0,0 +1,343 @@
|
||||
package org.xlattice.crypto.filters;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
/**
|
||||
* A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set
|
||||
* of k hash functions to determine set membership. Each hash function
|
||||
* produces a value in the range 0..M-1. The filter is of size M. To
|
||||
* add a member to the set, apply each function to the new member and
|
||||
* set the corresponding bit in the filter. For M very large relative
|
||||
* to k, this will normally set k bits in the filter. To check whether
|
||||
* x is a member of the set, apply each of the k hash functions to x
|
||||
* and check whether the corresponding bits are set in the filter. If
|
||||
* any are not set, x is definitely not a member. If all are set, x
|
||||
* may be a member. The probability of error (the false positive rate)
|
||||
* is f = (1 - e^(-kN/M))^k, where N is the number of set members.
|
||||
*
|
||||
* This class takes advantage of the fact that SHA1 digests are good-
|
||||
* quality pseudo-random numbers. The k hash functions are the values
|
||||
* of distinct sets of bits taken from the 20-byte SHA1 hash. The
|
||||
* number of bits in the filter, M, is constrained to be a power of
|
||||
* 2; M == 2^m. The number of bits in each hash function may not
|
||||
* exceed floor(m/k).
|
||||
*
|
||||
* This class is designed to be thread-safe, but this has not been
|
||||
* exhaustively tested.
|
||||
*
|
||||
* @author < A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
|
||||
*
|
||||
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
|
||||
* app - http://xlattice.sourceforge.net/
|
||||
*
|
||||
* minor tweaks by jrandom, exposing unsynchronized access and
|
||||
* allowing larger M and K. changes released into the public domain.
|
||||
*
|
||||
* Note that this is used only by DecayingBloomFilter, which uses only
|
||||
* the unsynchronized locked_foo() methods.
|
||||
* Deprecated for use outside of the router; to be moved to router.jar.
|
||||
*
|
||||
* As of 0.8.11, the locked_foo() methods are thread-safe, in that they work,
|
||||
* but there is a minor risk of false-negatives if two threads are
|
||||
* accessing the same bloom filter integer.
|
||||
*/
|
||||
|
||||
public class BloomSHA1 {
|
||||
protected final int m;
|
||||
protected final int k;
|
||||
protected int count;
|
||||
|
||||
protected final int[] filter;
|
||||
protected final KeySelector ks;
|
||||
|
||||
// convenience variables
|
||||
protected final int filterBits;
|
||||
protected final int filterWords;
|
||||
|
||||
private final BlockingQueue<int[]> buf;
|
||||
|
||||
/* (24,11) too big - see KeySelector
|
||||
|
||||
public static void main(String args[]) {
|
||||
BloomSHA1 b = new BloomSHA1(24, 11);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
byte v[] = new byte[32];
|
||||
v[0] = (byte)i;
|
||||
b.insert(v);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Creates a filter with 2^m bits and k 'hash functions', where
|
||||
* each hash function is portion of the 160-bit SHA1 hash.
|
||||
|
||||
* @param m determines number of bits in filter
|
||||
* @param k number of hash functionsx
|
||||
*
|
||||
* See KeySelector for important restriction on max m and k
|
||||
*/
|
||||
public BloomSHA1( int m, int k) {
|
||||
// XXX need to devise more reasonable set of checks
|
||||
//if ( m < 2 || m > 20) {
|
||||
// throw new IllegalArgumentException("m out of range");
|
||||
//}
|
||||
//if ( k < 1 || ( k * m > 160 )) {
|
||||
// throw new IllegalArgumentException(
|
||||
// "too many hash functions for filter size");
|
||||
//}
|
||||
this.m = m;
|
||||
this.k = k;
|
||||
filterBits = 1 << m;
|
||||
filterWords = (filterBits + 31)/32; // round up
|
||||
filter = new int[filterWords];
|
||||
ks = new KeySelector(m, k);
|
||||
buf = new LinkedBlockingQueue(16);
|
||||
|
||||
// DEBUG
|
||||
//System.out.println("Bloom constructor: m = " + m + ", k = " + k
|
||||
// + "\n filterBits = " + filterBits
|
||||
// + ", filterWords = " + filterWords);
|
||||
// END
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a filter of 2^m bits, with the number of 'hash functions"
|
||||
* k defaulting to 8.
|
||||
* @param m determines size of filter
|
||||
*/
|
||||
public BloomSHA1 (int m) {
|
||||
this(m, 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a filter of 2^20 bits with k defaulting to 8.
|
||||
*/
|
||||
public BloomSHA1 () {
|
||||
this (20, 8);
|
||||
}
|
||||
/** Clear the filter, unsynchronized */
|
||||
protected void doClear() {
|
||||
Arrays.fill(filter, 0);
|
||||
count = 0;
|
||||
}
|
||||
/** Synchronized version */
|
||||
public void clear() {
|
||||
synchronized (this) {
|
||||
doClear();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Returns the number of keys which have been inserted. This
|
||||
* class (BloomSHA1) does not guarantee uniqueness in any sense; if the
|
||||
* same key is added N times, the number of set members reported
|
||||
* will increase by N.
|
||||
*
|
||||
* @return number of set members
|
||||
*/
|
||||
public final int size() {
|
||||
synchronized (this) {
|
||||
return count;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @return number of bits in filter
|
||||
*/
|
||||
public final int capacity () {
|
||||
return filterBits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a key to the set represented by the filter.
|
||||
*
|
||||
* XXX This version does not maintain 4-bit counters, it is not
|
||||
* a counting Bloom filter.
|
||||
*
|
||||
* @param b byte array representing a key (SHA1 digest)
|
||||
*/
|
||||
public void insert (byte[]b) { insert(b, 0, b.length); }
|
||||
|
||||
public void insert (byte[]b, int offset, int len) {
|
||||
synchronized(this) {
|
||||
locked_insert(b, offset, len);
|
||||
}
|
||||
}
|
||||
|
||||
public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); }
|
||||
|
||||
public final void locked_insert(byte[]b, int offset, int len) {
|
||||
int[] bitOffset = acquire();
|
||||
int[] wordOffset = acquire();
|
||||
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
|
||||
for (int i = 0; i < k; i++) {
|
||||
filter[wordOffset[i]] |= 1 << bitOffset[i];
|
||||
}
|
||||
count++;
|
||||
buf.offer(bitOffset);
|
||||
buf.offer(wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is a key in the filter. Sets up the bit and word offset arrays.
|
||||
*
|
||||
* @param b byte array representing a key (SHA1 digest)
|
||||
* @return true if b is in the filter
|
||||
*/
|
||||
protected final boolean isMember(byte[] b) { return isMember(b, 0, b.length); }
|
||||
|
||||
protected final boolean isMember(byte[] b, int offset, int len) {
|
||||
int[] bitOffset = acquire();
|
||||
int[] wordOffset = acquire();
|
||||
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
|
||||
for (int i = 0; i < k; i++) {
|
||||
if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
|
||||
buf.offer(bitOffset);
|
||||
buf.offer(wordOffset);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
buf.offer(bitOffset);
|
||||
buf.offer(wordOffset);
|
||||
return true;
|
||||
}
|
||||
|
||||
public final boolean locked_member(byte[]b) { return isMember(b); }
|
||||
public final boolean locked_member(byte[]b, int offset, int len) { return isMember(b, offset, len); }
|
||||
|
||||
/**
|
||||
* Is a key in the filter. External interface, internally synchronized.
|
||||
*
|
||||
* @param b byte array representing a key (SHA1 digest)
|
||||
* @return true if b is in the filter
|
||||
*/
|
||||
public final boolean member(byte[]b) { return member(b, 0, b.length); }
|
||||
public final boolean member(byte[]b, int offset, int len) {
|
||||
synchronized (this) {
|
||||
return isMember(b, offset, len);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the bloom filter offsets for reuse.
|
||||
* Caller should call release(rv) when done with it.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public FilterKey getFilterKey(byte[] b, int offset, int len) {
|
||||
int[] bitOffset = acquire();
|
||||
int[] wordOffset = acquire();
|
||||
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
|
||||
return new FilterKey(bitOffset, wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the key to the filter.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public void locked_insert(FilterKey fk) {
|
||||
for (int i = 0; i < k; i++) {
|
||||
filter[fk.wordOffset[i]] |= 1 << fk.bitOffset[i];
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Is the key in the filter.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public boolean locked_member(FilterKey fk) {
|
||||
for (int i = 0; i < k; i++) {
|
||||
if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 0.8.11
|
||||
*/
|
||||
private int[] acquire() {
|
||||
int[] rv = buf.poll();
|
||||
if (rv != null)
|
||||
return rv;
|
||||
return new int[k];
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public void release(FilterKey fk) {
|
||||
buf.offer(fk.bitOffset);
|
||||
buf.offer(fk.wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the (opaque) bloom filter offsets for reuse.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public static class FilterKey {
|
||||
|
||||
private final int[] bitOffset;
|
||||
private final int[] wordOffset;
|
||||
|
||||
private FilterKey(int[] bitOffset, int[] wordOffset) {
|
||||
this.bitOffset = bitOffset;
|
||||
this.wordOffset = wordOffset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param n number of set members
|
||||
* @return approximate false positive rate
|
||||
*/
|
||||
public final double falsePositives(int n) {
|
||||
// (1 - e(-kN/M))^k
|
||||
return java.lang.Math.pow (
|
||||
(1l - java.lang.Math.exp(0d- ((double)k) * (long)n / filterBits)), k);
|
||||
}
|
||||
|
||||
public final double falsePositives() {
|
||||
return falsePositives(count);
|
||||
}
|
||||
|
||||
/*****
|
||||
// DEBUG METHODS
|
||||
public static String keyToString(byte[] key) {
|
||||
StringBuilder sb = new StringBuilder().append(key[0]);
|
||||
for (int i = 1; i < key.length; i++) {
|
||||
sb.append(".").append(Integer.toString(key[i], 16));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
*****/
|
||||
|
||||
/** convert 64-bit integer to hex String */
|
||||
/*****
|
||||
public static String ltoh (long i) {
|
||||
StringBuilder sb = new StringBuilder().append("#")
|
||||
.append(Long.toString(i, 16));
|
||||
return sb.toString();
|
||||
}
|
||||
*****/
|
||||
|
||||
/** convert 32-bit integer to String */
|
||||
/*****
|
||||
public static String itoh (int i) {
|
||||
StringBuilder sb = new StringBuilder().append("#")
|
||||
.append(Integer.toString(i, 16));
|
||||
return sb.toString();
|
||||
}
|
||||
*****/
|
||||
|
||||
/** convert single byte to String */
|
||||
/*****
|
||||
public static String btoh (byte b) {
|
||||
int i = 0xff & b;
|
||||
return itoh(i);
|
||||
}
|
||||
*****/
|
||||
}
|
||||
|
279
router/java/src/org/xlattice/crypto/filters/KeySelector.java
Normal file
279
router/java/src/org/xlattice/crypto/filters/KeySelector.java
Normal file
@ -0,0 +1,279 @@
|
||||
package org.xlattice.crypto.filters;
|
||||
|
||||
/**
|
||||
* Given a key, populates arrays determining word and bit offsets into
|
||||
* a Bloom filter.
|
||||
*
|
||||
* @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
|
||||
*
|
||||
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
|
||||
* app - http://xlattice.sourceforge.net/
|
||||
*
|
||||
* minor tweaks by jrandom, exposing unsynchronized access and
|
||||
* allowing larger M and K. changes released into the public domain.
|
||||
*
|
||||
* As of 0.8.11, bitoffset and wordoffset out parameters moved from fields
|
||||
* to selector arguments, to allow concurrency.
|
||||
* ALl methods are now thread-safe.
|
||||
*/
|
||||
public class KeySelector {
|
||||
|
||||
private final int m;
|
||||
private final int k;
|
||||
private final BitSelector bitSel;
|
||||
private final WordSelector wordSel;
|
||||
|
||||
public interface BitSelector {
|
||||
/**
|
||||
* @param bitOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameter added
|
||||
*/
|
||||
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset);
|
||||
}
|
||||
|
||||
public interface WordSelector {
|
||||
/**
|
||||
* @param wordOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameter added
|
||||
*/
|
||||
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset);
|
||||
}
|
||||
|
||||
/** AND with byte to expose index-many bits */
|
||||
public final static int[] UNMASK = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767};
|
||||
/** AND with byte to zero out index-many bits */
|
||||
public final static int[] MASK = {
|
||||
~0,~1,~3,~7,~15,~31,~63,~127,~255,~511,~1023,~2047,~4095,~8191,~16383,~32767};
|
||||
|
||||
public final static int TWO_UP_15 = 32 * 1024;
|
||||
|
||||
/**
|
||||
* Creates a key selector for a Bloom filter. When a key is presented
|
||||
* to the getOffsets() method, the k 'hash function' values are
|
||||
* extracted and used to populate bitOffset and wordOffset arrays which
|
||||
* specify the k flags to be set or examined in the filter.
|
||||
*
|
||||
* @param m size of the filter as a power of 2
|
||||
* @param k number of 'hash functions'
|
||||
*
|
||||
* Note that if k and m are too big, the GenericWordSelector blows up -
|
||||
* The max for 32-byte keys is m=23 and k=11.
|
||||
* The precise restriction appears to be:
|
||||
* ((5k + (k-1)(m-5)) / 8) + 2 < keySizeInBytes
|
||||
*
|
||||
* It isn't clear how to fix this.
|
||||
*/
|
||||
public KeySelector (int m, int k) {
|
||||
//if ( (m < 2) || (m > 20)|| (k < 1)
|
||||
// || (bitOffset == null) || (wordOffset == null)) {
|
||||
// throw new IllegalArgumentException();
|
||||
//}
|
||||
this.m = m;
|
||||
this.k = k;
|
||||
bitSel = new GenericBitSelector();
|
||||
wordSel = new GenericWordSelector();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the k bit offsets from a key, suitable for general values
|
||||
* of m and k.
|
||||
*/
|
||||
public class GenericBitSelector implements BitSelector {
|
||||
/** Do the extraction */
|
||||
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) {
|
||||
int curBit = 8 * offset;
|
||||
int curByte;
|
||||
for (int j = 0; j < k; j++) {
|
||||
curByte = curBit / 8;
|
||||
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
|
||||
|
||||
// // DEBUG
|
||||
// System.out.println (
|
||||
// "this byte = " + btoh(b[curByte])
|
||||
// + ", next byte = " + btoh(b[curByte + 1])
|
||||
// + "; curBit=" + curBit + ", curByte= " + curByte
|
||||
// + ", bitsUnused=" + bitsUnused);
|
||||
// // END
|
||||
if (bitsUnused > 5) {
|
||||
bitOffset[j] = ((0xff & b[curByte])
|
||||
>> (bitsUnused - 5)) & UNMASK[5];
|
||||
// // DEBUG
|
||||
// System.out.println(
|
||||
// " before shifting: " + btoh(b[curByte])
|
||||
// + "\n after shifting: "
|
||||
// + itoh( (0xff & b[curByte]) >> (bitsUnused - 5))
|
||||
// + "\n mask: " + itoh(UNMASK[5]) );
|
||||
// // END
|
||||
} else if (bitsUnused == 5) {
|
||||
bitOffset[j] = b[curByte] & UNMASK[5];
|
||||
} else {
|
||||
bitOffset[j] = (b[curByte] & UNMASK[bitsUnused])
|
||||
| (((0xff & b[curByte + 1]) >> 3)
|
||||
& MASK[bitsUnused]);
|
||||
// // DEBUG
|
||||
// System.out.println(
|
||||
// " contribution from first byte: "
|
||||
// + itoh(b[curByte] & UNMASK[bitsUnused])
|
||||
// + "\n second byte: " + btoh(b[curByte + 1])
|
||||
// + "\n shifted: " + itoh((0xff & b[curByte + 1]) >> 3)
|
||||
// + "\n mask: " + itoh(MASK[bitsUnused])
|
||||
// + "\n contribution from second byte: "
|
||||
// + itoh((0xff & b[curByte + 1] >> 3) & MASK[bitsUnused]));
|
||||
// // END
|
||||
}
|
||||
// // DEBUG
|
||||
// System.out.println (" bitOffset[j] = " + bitOffset[j]);
|
||||
// // END
|
||||
curBit += 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Extracts the k word offsets from a key. Suitable for general
|
||||
* values of m and k. See above for formula for max m and k.
|
||||
*/
|
||||
public class GenericWordSelector implements WordSelector {
|
||||
/** Extract the k offsets into the word offset array */
|
||||
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) {
|
||||
int stride = m - 5;
|
||||
//assert true: stride<16;
|
||||
int curBit = (k * 5) + (offset * 8);
|
||||
int curByte;
|
||||
for (int j = 0; j < k; j++) {
|
||||
curByte = curBit / 8;
|
||||
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
|
||||
|
||||
// // DEBUG
|
||||
// System.out.println (
|
||||
// "curr 3 bytes: " + btoh(b[curByte])
|
||||
// + (curByte < 19 ?
|
||||
// " " + btoh(b[curByte + 1]) : "")
|
||||
// + (curByte < 18 ?
|
||||
// " " + btoh(b[curByte + 2]) : "")
|
||||
// + "; curBit=" + curBit + ", curByte= " + curByte
|
||||
// + ", bitsUnused=" + bitsUnused);
|
||||
// // END
|
||||
|
||||
if (bitsUnused > stride) {
|
||||
// the value is entirely within the current byte
|
||||
wordOffset[j] = ((0xff & b[curByte])
|
||||
>> (bitsUnused - stride))
|
||||
& UNMASK[stride];
|
||||
} else if (bitsUnused == stride) {
|
||||
// the value fills the current byte
|
||||
wordOffset[j] = b[curByte] & UNMASK[stride];
|
||||
} else { // bitsUnused < stride
|
||||
// value occupies more than one byte
|
||||
// bits from first byte, right-aligned in result
|
||||
wordOffset[j] = b[curByte] & UNMASK[bitsUnused];
|
||||
// // DEBUG
|
||||
// System.out.println(" first byte contributes "
|
||||
// + itoh(wordOffset[j]));
|
||||
// // END
|
||||
// bits from second byte
|
||||
int bitsToGet = stride - bitsUnused;
|
||||
if (bitsToGet >= 8) {
|
||||
// 8 bits from second byte
|
||||
wordOffset[j] |= (0xff & b[curByte + 1]) << bitsUnused;
|
||||
// // DEBUG
|
||||
// System.out.println(" second byte contributes "
|
||||
// + itoh(
|
||||
// (0xff & b[curByte + 1]) << bitsUnused
|
||||
// ));
|
||||
// // END
|
||||
|
||||
// bits from third byte
|
||||
bitsToGet -= 8;
|
||||
if (bitsToGet > 0) {
|
||||
// AIOOBE here if m and k too big (23,11 is the max)
|
||||
// for a 32-byte key - see above
|
||||
wordOffset[j] |=
|
||||
((0xff & b[curByte + 2]) >> (8 - bitsToGet))
|
||||
<< (stride - bitsToGet) ;
|
||||
// // DEBUG
|
||||
// System.out.println(" third byte contributes "
|
||||
// + itoh(
|
||||
// (((0xff & b[curByte + 2]) >> (8 - bitsToGet))
|
||||
// << (stride - bitsToGet))
|
||||
// ));
|
||||
// // END
|
||||
}
|
||||
} else {
|
||||
// all remaining bits are within second byte
|
||||
wordOffset[j] |= ((b[curByte + 1] >> (8 - bitsToGet))
|
||||
& UNMASK[bitsToGet])
|
||||
<< bitsUnused;
|
||||
// // DEBUG
|
||||
// System.out.println(" second byte contributes "
|
||||
// + itoh(
|
||||
// ((b[curByte + 1] >> (8 - bitsToGet))
|
||||
// & UNMASK[bitsToGet])
|
||||
// << bitsUnused
|
||||
// ));
|
||||
// // END
|
||||
}
|
||||
}
|
||||
// // DEBUG
|
||||
// System.out.println (
|
||||
// " wordOffset[" + j + "] = " + wordOffset[j]
|
||||
// + ", " + itoh(wordOffset[j])
|
||||
// );
|
||||
// // END
|
||||
curBit += stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a key, populate the word and bit offset arrays, each
|
||||
* of which has k elements.
|
||||
*
|
||||
* @param key cryptographic key used in populating the arrays
|
||||
* @param bitOffset Out parameter of length k
|
||||
* @param wordOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameters added
|
||||
*/
|
||||
public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) {
|
||||
getOffsets(key, 0, key.length, bitOffset, wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a key, populate the word and bit offset arrays, each
|
||||
* of which has k elements.
|
||||
*
|
||||
* @param key cryptographic key used in populating the arrays
|
||||
* @param bitOffset Out parameter of length k
|
||||
* @param wordOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameters added
|
||||
*/
|
||||
public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) {
|
||||
// skip these checks for speed
|
||||
//if (key == null) {
|
||||
// throw new IllegalArgumentException("null key");
|
||||
//}
|
||||
//if (len < 20) {
|
||||
// throw new IllegalArgumentException(
|
||||
// "key must be at least 20 bytes long");
|
||||
//}
|
||||
// // DEBUG
|
||||
// System.out.println("KeySelector.getOffsets for "
|
||||
// + BloomSHA1.keyToString(b));
|
||||
// // END
|
||||
bitSel.getBitSelectors(key, off, len, bitOffset);
|
||||
wordSel.getWordSelectors(key, off, len, wordOffset);
|
||||
}
|
||||
|
||||
/*****
|
||||
// DEBUG METHODS ////////////////////////////////////////////////
|
||||
String itoh(int i) {
|
||||
return BloomSHA1.itoh(i);
|
||||
}
|
||||
String btoh(byte b) {
|
||||
return BloomSHA1.btoh(b);
|
||||
}
|
||||
*****/
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user