This commit is contained in:
zzz
2013-05-13 20:43:30 +00:00
parent e0b25cdcf9
commit a19140e186
2 changed files with 399 additions and 32 deletions

View File

@ -48,8 +48,12 @@ class GeoIP {
private final Map<String, String> _codeToName;
/** code to itself to prevent String proliferation */
private final Map<String, String> _codeCache;
// In the following structures, an IPv4 IP is stored as a non-negative long, 0 to 2**32 - 1,
// and the first 8 bytes of an IPv6 IP are stored as a signed long.
private final Map<Long, String> _IPToCountry;
private final Set<Long> _pendingSearch;
private final Set<Long> _pendingIPv6Search;
private final Set<Long> _notFound;
private final AtomicBoolean _lock;
private int _lookupRunCount;
@ -58,10 +62,11 @@ class GeoIP {
public GeoIP(RouterContext context) {
_context = context;
_log = context.logManager().getLog(GeoIP.class);
_codeToName = new ConcurrentHashMap(256);
_codeCache = new ConcurrentHashMap(256);
_codeToName = new ConcurrentHashMap(512);
_codeCache = new ConcurrentHashMap(512);
_IPToCountry = new ConcurrentHashMap();
_pendingSearch = new ConcurrentHashSet();
_pendingIPv6Search = new ConcurrentHashSet();
_notFound = new ConcurrentHashSet();
_lock = new AtomicBoolean();
readCountryFile();
@ -81,6 +86,7 @@ class GeoIP {
_codeCache.clear();
_IPToCountry.clear();
_pendingSearch.clear();
_pendingIPv6Search.clear();
_notFound.clear();
}
@ -107,6 +113,7 @@ class GeoIP {
public void blockingLookup() {
if (! _context.getBooleanPropertyDefaultTrue(PROP_GEOIP_ENABLED)) {
_pendingSearch.clear();
_pendingIPv6Search.clear();
return;
}
int pri = Thread.currentThread().getPriority();
@ -132,18 +139,29 @@ class GeoIP {
// clear the negative cache every few runs, to prevent it from getting too big
if (((++_lookupRunCount) % CLEAR) == 0)
_notFound.clear();
// IPv4
Long[] search = _pendingSearch.toArray(new Long[_pendingSearch.size()]);
if (search.length <= 0)
return;
_pendingSearch.clear();
Arrays.sort(search);
String[] countries = readGeoIPFile(search);
for (int i = 0; i < countries.length; i++) {
if (countries[i] != null)
_IPToCountry.put(search[i], countries[i]);
else
_notFound.add(search[i]);
if (search.length > 0) {
String[] countries = readGeoIPFile(search);
for (int i = 0; i < countries.length; i++) {
if (countries[i] != null)
_IPToCountry.put(search[i], countries[i]);
else
_notFound.add(search[i]);
}
}
// IPv6
search = _pendingSearch.toArray(new Long[_pendingIPv6Search.size()]);
_pendingIPv6Search.clear();
if (search.length > 0) {
String[] countries = GeoIPv6.readGeoIPFile(_context, search, _codeCache);
for (int i = 0; i < countries.length; i++) {
if (countries[i] != null)
_IPToCountry.put(search[i], countries[i]);
else
_notFound.add(search[i]);
}
}
} finally {
_lock.set(false);
@ -169,16 +187,16 @@ class GeoIP {
*
*/
private void readCountryFile() {
File GeoFile = new File(_context.getBaseDir(), GEOIP_DIR_DEFAULT);
GeoFile = new File(GeoFile, COUNTRY_FILE_DEFAULT);
if (!GeoFile.exists()) {
File geoFile = new File(_context.getBaseDir(), GEOIP_DIR_DEFAULT);
geoFile = new File(geoFile, COUNTRY_FILE_DEFAULT);
if (!geoFile.exists()) {
if (_log.shouldLog(Log.WARN))
_log.warn("Country file not found: " + GeoFile.getAbsolutePath());
_log.warn("Country file not found: " + geoFile.getAbsolutePath());
return;
}
FileInputStream in = null;
try {
in = new FileInputStream(GeoFile);
in = new FileInputStream(geoFile);
BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line = null;
while ( (line = br.readLine()) != null) {
@ -228,11 +246,11 @@ class GeoIP {
*
*/
private String[] readGeoIPFile(Long[] search) {
File GeoFile = new File(_context.getBaseDir(), GEOIP_DIR_DEFAULT);
GeoFile = new File(GeoFile, GEOIP_FILE_DEFAULT);
if (!GeoFile.exists()) {
File geoFile = new File(_context.getBaseDir(), GEOIP_DIR_DEFAULT);
geoFile = new File(geoFile, GEOIP_FILE_DEFAULT);
if (!geoFile.exists()) {
if (_log.shouldLog(Log.WARN))
_log.warn("GeoIP file not found: " + GeoFile.getAbsolutePath());
_log.warn("GeoIP file not found: " + geoFile.getAbsolutePath());
return new String[0];
}
String[] rv = new String[search.length];
@ -240,7 +258,7 @@ class GeoIP {
long start = _context.clock().now();
FileInputStream in = null;
try {
in = new FileInputStream(GeoFile);
in = new FileInputStream(geoFile);
String buf = null;
BufferedReader br = new BufferedReader(new InputStreamReader(in, "ISO-8859-1"));
while ((buf = br.readLine()) != null && idx < search.length) {
@ -268,7 +286,7 @@ class GeoIP {
}
} catch (IOException ioe) {
if (_log.shouldLog(Log.ERROR))
_log.error("Error reading the GeoFile", ioe);
_log.error("Error reading the geoFile", ioe);
} finally {
if (in != null) try { in.close(); } catch (IOException ioe) {}
}
@ -307,6 +325,7 @@ class GeoIP {
/**
* Add to the list needing lookup
* @param ip IPv4 or IPv6
*/
public void add(String ip) {
byte[] pib = Addresses.getIP(ip);
@ -314,20 +333,28 @@ class GeoIP {
add(pib);
}
/**
* Add to the list needing lookup
* @param ip IPv4 or IPv6
*/
public void add(byte ip[]) {
if (ip.length != 4)
return;
add(toLong(ip));
}
/** see above for ip-to-long mapping */
private void add(long ip) {
Long li = Long.valueOf(ip);
if (!(_IPToCountry.containsKey(li) || _notFound.contains(li)))
_pendingSearch.add(li);
if (!(_IPToCountry.containsKey(li) || _notFound.contains(li))) {
if (ip >= 0 && ip < (1L << 32))
_pendingSearch.add(li);
else
_pendingIPv6Search.add(li);
}
}
/**
* Get the country for an IP from the cache.
* @param ip IPv4 or IPv6
* @return lower-case code, generally two letters, or null.
*/
public String get(String ip) {
@ -338,23 +365,30 @@ class GeoIP {
/**
* Get the country for an IP from the cache.
* @param ip IPv4 or IPv6
* @return lower-case code, generally two letters, or null.
*/
public String get(byte ip[]) {
if (ip.length != 4)
return null;
return get(toLong(ip));
}
/** see above for ip-to-long mapping */
private String get(long ip) {
return _IPToCountry.get(Long.valueOf(ip));
}
/** see above for ip-to-long mapping */
private static long toLong(byte ip[]) {
int rv = 0;
for (int i = 0; i < 4; i++)
rv |= (ip[i] & 0xff) << ((3-i)*8);
return rv & 0xffffffffl;
if (ip.length == 16) {
for (int i = 0; i < 8; i++)
rv |= (ip[i] & 0xffL) << ((7-i)*8);
return rv;
} else {
for (int i = 0; i < 4; i++)
rv |= (ip[i] & 0xff) << ((3-i)*8);
return rv & 0xffffffffl;
}
}
/**

View File

@ -0,0 +1,333 @@
package net.i2p.router.transport;
/*
* free (adj.): unencumbered; not under the control of others
* Use at your own risk.
*/
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import net.i2p.I2PAppContext;
import net.i2p.data.DataHelper;
import net.i2p.util.Log;
/**
* Generate compressed geoipv6.dat.gz file, and
* lookup entries in it.
*/
class GeoIPv6 {
private static final String GEOIP_DIR_DEFAULT = "geoip";
private static final String GEOIP_FILE_DEFAULT = "geoipv6.dat.gz";
private static final String MAGIC = "I2PGeoIPv6\0\001\0\0\0\0";
private static final String COMMENT = "I2P compressed geoipv6 file. See GeoIPv6.java for format.";
/** includes magic */
private static final int HEADER_LEN = 256;
/**
* Lookup search items in the geoip file.
* See below for format.
*/
public static String[] readGeoIPFile(I2PAppContext context, Long[] search, Map<String, String> codeCache) {
Log log = context.logManager().getLog(GeoIPv6.class);
File geoFile = new File(context.getBaseDir(), GEOIP_DIR_DEFAULT);
geoFile = new File(geoFile, GEOIP_FILE_DEFAULT);
if (!geoFile.exists()) {
if (log.shouldLog(Log.WARN))
log.warn("GeoIP file not found: " + geoFile.getAbsolutePath());
return new String[0];
}
return readGeoIPFile(geoFile, search, codeCache, log);
}
/**
* Lookup search items in the geoip file.
* See below for format.
*/
private static String[] readGeoIPFile(File geoFile, Long[] search, Map<String, String> codeCache, Log log) {
String[] rv = new String[search.length];
int idx = 0;
long start = System.currentTimeMillis();
InputStream in = null;
try {
in = new GZIPInputStream(new BufferedInputStream(new FileInputStream(geoFile)));
byte[] magic = new byte[MAGIC.length()];
DataHelper.read(in, magic);
if (!DataHelper.eq(magic, DataHelper.getASCII(MAGIC)))
throw new IOException("Not a IPv6 geoip data file");
// skip timestamp and comments
in.skip(HEADER_LEN - MAGIC.length());
byte[] buf = new byte[18];
while (DataHelper.read(in, buf) == 18 && idx < search.length) {
long ip1 = readLong(buf, 0);
long ip2 = readLong(buf, 8);
while (idx < search.length && search[idx].longValue() < ip1) {
idx++;
}
while (idx < search.length && search[idx].longValue() >= ip1 && search[idx].longValue() <= ip2) {
// written in lower case
String lc = new String(buf, 16, 2, "ISO-8859-1");
// replace the new string with the identical one from the cache
String cached = codeCache.get(lc);
if (cached == null)
cached = lc;
rv[idx++] = cached;
}
}
} catch (IOException ioe) {
if (log.shouldLog(Log.ERROR))
log.error("Error reading the geoFile", ioe);
} finally {
if (in != null) try { in.close(); } catch (IOException ioe) {}
}
if (log.shouldLog(Log.INFO))
log.info("GeoIPv6 processing finished, time: " + (System.currentTimeMillis() - start));
return rv;
}
/**
* Read in and parse multiple IPv6 geoip CSV files,
* merge them, and write out a gzipped binary IPv6 geoip file.
*
* Acceptable input formats (IPv6 only):
*<pre>
* #comment (# must be in column 1)
* "text IP", "text IP", "bigint IP", "bigint IP", "country code", "country name"
*</pre>
* Quotes and spaces optional. Sorting not required.
* Country code case-insensitive.
* Fields 1, 2, and 5 are used; fields 3, 4, and 6 are ignored.
* This is identical to the format of the MaxMind GeoLite IPv6 file.
*
* Example:
*<pre>
* "2001:200::", "2001:200:ffff:ffff:ffff:ffff:ffff:ffff", "42540528726795050063891204319802818560", "42540528806023212578155541913346768895", "JP", "Japan"
*</pre>
*
*<pre>
* Output format:
* Bytes 0-9: Magic number "I2PGeoIPv6"
* Bytes 10-11: version (0x0001)
* Bytes 12-15 flags (0)
* Bytes 16-23: Date (long)
* Bytes 24-xx: Comment (UTF-8)
* Bytes xx-255: null padding
* Bytes 256-: 18 byte records:
* 8 byte from (/64)
* 8 byte to (/64)
* 2 byte country code LOWER case (ASCII)
* Data must be sorted (SIGNED twos complement), no overlap
*</pre>
*
* SLOW. For preprocessing only!
*
* @return success
*/
private static boolean compressGeoIPv6CSVFiles(List<File> inFiles, File outFile) {
boolean DEBUG = false;
List<V6Entry> entries = new ArrayList(20000);
for (File geoFile : inFiles) {
int count = 0;
InputStream in = null;
try {
in = new FileInputStream(geoFile);
String buf = null;
BufferedReader br = new BufferedReader(new InputStreamReader(in, "ISO-8859-1"));
while ((buf = br.readLine()) != null) {
try {
if (buf.charAt(0) == '#') {
continue;
}
String[] s = buf.split(",");
String ips1 = s[0].replace("\"", "").trim();
String ips2 = s[1].replace("\"", "").trim();
byte[] ip1 = InetAddress.getByName(ips1).getAddress();
byte[] ip2 = InetAddress.getByName(ips2).getAddress();
String country = s[4].replace("\"", "").trim().toLowerCase(Locale.US);
entries.add(new V6Entry(ip1, ip2, country));
count++;
} catch (UnknownHostException uhe) {
uhe.printStackTrace();
} catch (RuntimeException re) {
re.printStackTrace();
}
}
System.err.println("Read " + count + " entries from " + geoFile);
} catch (IOException ioe) {
ioe.printStackTrace();
//if (_log.shouldLog(Log.ERROR))
// _log.error("Error reading the geoFile", ioe);
return false;
} finally {
if (in != null) try { in.close(); } catch (IOException ioe) {}
}
}
Collections.sort(entries);
// merge
V6Entry old = null;
for (int i = 0; i < entries.size(); i++) {
V6Entry e = entries.get(i);
if (DEBUG)
System.out.println("proc " + e.toString());
if (old != null) {
if (e.from == old.from && e.to == old.to) {
// dup
if (DEBUG)
System.out.println("remove dup " + e);
entries.remove(i);
i--;
continue;
}
if (e.from <= old.to) {
// overlap
// truncate old
if (e.from < old.to) {
V6Entry rewrite = new V6Entry(old.from, e.from - 1, old.cc);
if (DEBUG)
System.out.println("rewrite old to " + rewrite);
entries.set(i - 1, rewrite);
}
if (e.to < old.to) {
// e inside old, add new after e
V6Entry insert = new V6Entry(e.to + 1, old.to, old.cc);
if (DEBUG)
System.out.println("insert " + insert);
int j = i + 1;
while (j < entries.size() && insert.compareTo(entries.get(j)) > 0) {
j++;
}
entries.add(j, insert);
}
}
}
old = e;
}
OutputStream out = null;
try {
out = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)));
out.write(DataHelper.getASCII(MAGIC));
writeLong(out, System.currentTimeMillis());
byte[] comment = DataHelper.getUTF8(COMMENT);
out.write(comment);
out.write(new byte[256 - (16 + 8 + comment.length)]);
for (V6Entry e : entries) {
writeLong(out, e.from);
writeLong(out, e.to);
out.write(DataHelper.getASCII(e.cc));
}
System.err.println("Wrote " + entries.size() + " entries to " + outFile);
} catch (IOException ioe) {
ioe.printStackTrace();
//if (_log.shouldLog(Log.ERROR))
// _log.error("Error reading the geoFile", ioe);
return false;
} finally {
if (out != null) try { out.close(); } catch (IOException ioe) {}
}
return true;
}
/**
* Used to temporarily hold, sort, and merge entries before compressing
*/
private static class V6Entry implements Comparable<V6Entry> {
public final long from, to;
public final String cc;
public V6Entry(byte[] f, byte[] t, String c) {
if (f.length != 16 || t.length != 16 || c.length() != 2)
throw new IllegalArgumentException();
from = toLong(f);
to = toLong(t);
cc = c;
if (to < from)
throw new IllegalArgumentException(toString());
}
public V6Entry(long f, long t, String c) {
from = f;
to = t;
cc = c;
if (t < f)
throw new IllegalArgumentException(toString());
}
/** twos complement */
public int compareTo(V6Entry r) {
if (from < r.from) return -1;
if (r.from < from) return 1;
if (to < r.to) return -1;
if (r.to < to) return 1;
return 0;
}
@Override
public String toString() {
return "0x" + Long.toHexString(from) + " -> 0x" + Long.toHexString(to) + " : " + cc;
}
}
private static long toLong(byte ip[]) {
long rv = 0;
for (int i = 0; i < 8; i++)
rv |= (ip[i] & 0xffL) << ((7-i)*8);
return rv;
}
/** like DataHelper.writeLong(rawStream, 8, value) but allows negative values */
private static void writeLong(OutputStream rawStream, long value) throws IOException {
for (int i = 56; i >= 0; i -= 8) {
byte cur = (byte) (value >> i);
rawStream.write(cur);
}
}
/** like DataHelper.readLong(src, offset, 8) but allows negative values */
private static long readLong(byte[] src, int offset) throws IOException {
long rv = 0;
int limit = offset + 8;
for (int i = offset; i < limit; i++) {
rv <<= 8;
rv |= src[i] & 0xFF;
}
return rv;
}
public static void main(String args[]) {
if (args.length < 2) {
System.err.println("Usage: GeoIP infile1.csv [infile2.csv...] outfile.dat.gz");
System.exit(1);
}
List<File> infiles = new ArrayList();
for (int i = 0; i < args.length - 1; i++) {
infiles.add(new File(args[i]));
}
File outfile = new File(args[args.length - 1]);
boolean success = compressGeoIPv6CSVFiles(infiles, outfile);
if (!success) {
System.err.println("Failed");
System.exit(1);
}
// readback for testing
readGeoIPFile(outfile, new Long[] { Long.MAX_VALUE }, Collections.EMPTY_MAP, new Log(GeoIPv6.class));
}
}