Added anonbib for use in papers list

Source: https://gitweb.torproject.org/anonbib.git Commit: b478fc493d4be2115185d94e077bf06196495417
2013-08-11 11:14:00 +00:00
parent e38ef21b71
commit 7c42ce8329
23 changed files with 11209 additions and 0 deletions
--- a/i2p2www/anonbib/BibTeX.py
+++ b/i2p2www/anonbib/BibTeX.py
--- a/i2p2www/anonbib/Makefile
+++ b/i2p2www/anonbib/Makefile
@ -0,0 +1,39 @@
+PYTHON=python
+VERSION=0.3-dev
+
+all:
+	$(PYTHON) writeHTML.py anonbib.cfg
+
+clean:
+	rm -f *~ */*~ *.pyc *.pyo
+
+update:
+	$(PYTHON) updateCache.py anonbib.cfg
+	$(PYTHON) rank.py anonbib.cfg
+
+suggest:
+	$(PYTHON) rank.py suggest anonbib.cfg
+
+test:
+	$(PYTHON) test.py
+
+veryclean: clean
+	rm -f author.html date.html topic.html bibtex.html tmp.bib
+
+TEMPLATES=_template_.html _template_bibtex.html
+CSS=css/main.css css/pubs.css
+BIBTEX=anonbib.bib
+SOURCE=BibTeX.py config.py metaphone.py reconcile.py updateCache.py \
+	writeHTML.py rank.py tests.py
+EXTRAS=TODO README Makefile ChangeLog anonbib.cfg gold.gif silver.gif \
+	upb.gif ups.gif
+
+DISTFILES=$(TEMPLATES) $(CSS) $(BIBTEX) $(SOURCE) $(EXTRAS)
+
+dist: clean
+	rm -rf anonbib-$(VERSION)
+	mkdir anonbib-$(VERSION)
+	tar cf - $(DISTFILES) | (cd anonbib-$(VERSION); tar xf -)
+	mkdir anonbib-$(VERSION)/cache
+	tar czf anonbib-$(VERSION).tar.gz anonbib-$(VERSION)
+	rm -rf anonbib-$(VERSION)
--- a/i2p2www/anonbib/README
+++ b/i2p2www/anonbib/README
@ -0,0 +1,52 @@
+anonbib 0.3 -- Code to generate the anonymity bibliography
+
+Copyright (c) 2003-2008 Nick Mathewson
+Based on 'PDOSBib' perl code by Eddie Kohler
+
+This software is licensed under the GNU GPL, version 2 or later.
+
+To use this software, you need to understand BibTeX and Python a
+little. If it breaks, you get to keep both pieces.  You will need
+Python 2.2 or later.
+
+To use this package:
+   - Get a good BibTeX file.  You may want to mark it up with some of the
+     extra keys used in our "anonbib.bib" file.  All of the additional
+     Bibtex keys we use have the prefix "www_"; check out anonbib.bib
+     for their usage.
+
+   - Edit anonbib.cfg and _template_.html and _template_bibtex.html so they
+     refer to your files, authors, topics, and so on.
+
+   - Run 'python updateCache.py anonbib.cfg' to create a local cache of the
+     papers in your bibliography based on their www_*_url entries.  (By
+     default, the script will ignore any entries you have already cached.  To
+     force a fresh download of a cached file, delete it.)
+
+   - Run 'python rank.py anonbib.cfg' to download Google Scholar rankings of
+     all the papers.
+
+   - Run 'python writeHTML.py anonbib.cfg'.  Fix any errors you care about.
+
+   - Re-run these scripts when you change the bibliography.
+
+   - If you want to merge in big BibTeX files, try using the reconcile.py
+     script.  See the comment at the start of the file for usage info.
+
+
+New in 0.3:
+   - Support for Google Scholar rankings to denote hot/rising papers.
+     Implemented by George Danezis.
+   - Make reconcile script generate more useful output.
+   - Add support for multiple bibliographies generated from a single bibtex
+     source.  This is done via 'tags' on bibtex entries.  If an entry is
+     tagged, it appears in the corresponding bibliographies.  This is good
+     for generating a master bibliography and one or more selected readings
+     lists from the same source.
+   - Handle more errors when downloading files.
+   - When fetching a paper with a .ps url, generate the .ps.gz file
+     automatically.
+   - Note an error when a crossref overrides an existing field in an entry.
+   - Handle the Proceedings type correctly.
+   - Enforce proper encoding on pages: it must be number--number.
+   - 
--- a/i2p2www/anonbib/TODO
+++ b/i2p2www/anonbib/TODO
@ -0,0 +1,33 @@
+
+
+- More general tasks
+        . Know about @book
+	. Write unit tests for everything
+	. Make name parsing vaguely sane
+        - Maybe uncrossref in tmp.bib
+        - Maybe pull important papers to the start of their sections?
+        . Clean \{}~ when going from note to url; add \{}~ when making
+          note from url.
+        . Also clean \_ to _ and back
+        - Look for urls in wherepublished.
+        . Forgive newlines in wherepublished, note.
+        - When sorting by date, entries with unknown months go into a magic
+          "month zero" before January.  Is this right?
+	- Strip unused features.
+	o Take a configuration file on the command line instead of just
+	  importing config.py.
+
+- Cache tasks
+	- Generate a list of broken links
+	- Re-download all cached items if requested
+	- Clear dead items from cache
+	- Use HTTP HEAD requests to decide whetherto update stale
+          elements in cache.
+	- Add ability to honor a "www_no_cache={1}" option for entries
+          if the authors ask us not to cache them.
+	- Maybe, add ability to cache images from an HTML page.
+
+- Reconcile tasks
+        - Document it.
+        - Notice when there is new or different information of certain kinds
+          (pages, dates, etc) in the new information.
--- a/i2p2www/anonbib/_template_.html
+++ b/i2p2www/anonbib/_template_.html
@ -0,0 +1,74 @@
+<?xml version="1.0"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta http-equiv="Content-Style-Type" content="text/css" />
+
+<!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
+     *** EDIT THE .bib FILE or _template_.html INSTEAD!
+
+     Generated by `%(command_line)s'
+     (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
+
+<!-- anon bib anonymity bibliography -->
+<title>%(title)s</title>
+
+<link rel="stylesheet" type="text/css" href="%(root)s/css/main.css" />
+<link rel="stylesheet" type="text/css" href="%(root)s/css/pubs.css" />
+
+</head>
+<body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099"
+alink="#ff9900" >
+
+<h1 align="center">%(short_title)s</h1>
+<p align="center">%(otherbibs)s</p>
+<p align="center">%(choices)s</p>
+
+<!-- Table 1: contains the sidebar and the body. -->
+<table cellspacing="3" cellpadding="0" border="0" align="center" width="100%%">
+<tr valign="top">
+<td>
+
+<!-- Table 2: The sidebar-->
+<table align="right" cellspacing="0" cellpadding="5" width="100"
+     class="sidebar">
+<tr valign="top"><td><p class="l1"><strong>%(sectiontypes)s:</strong><br /></p>
+%(sections)s
+</td>
+</tr>
+</table><!-- End of table 2 -->
+</td>
+
+<td width="85%%">
+
+<h2>Publications by %(field)s</h2>
+
+<ul class="sections">
+%(entries)s
+</ul>
+
+</td>
+
+<td width="5%%"><br /></td>
+
+</tr>
+</table><!-- End of table 1 -->
+
+
+<p class="contact">Please send new or corrected entries to
+<a name="contact">
+<span class="email">&lt;anonbib&#64;free<!-- -->&#104;aven.net&gt;</span></a>.
+<br />
+If you can, please format them as BibTeX; see our
+<a href="bibtex.html">BibTeX source page</a> for examples.<br />
+Remember to include URLs if possible:
+<a href="http://citeseer.ist.psu.edu/online-nature01/">offline papers are
+less useful</a>.
+</p>
+<p>The source code to anonbib is now in Git. See <a href="https://gitweb.torproject.org/anonbib.git">the anonbib gitweb page</a> for more information.
+</p>
+
+</body>
+</html>
--- a/i2p2www/anonbib/_template_bibtex.html
+++ b/i2p2www/anonbib/_template_bibtex.html
@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+<meta http-equiv="Content-Style-Type" content="text/css" />
+
+<!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
+     *** EDIT THE .bib FILE or _template_.html INSTEAD!
+
+     Generated by `%(command_line)s'
+     (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
+
+<title>%(title)s: BibTeX</title>
+
+<link rel="stylesheet" type="text/css" href="%(root)s/css/main.css" />
+<link rel="stylesheet" type="text/css" href="%(root)s/css/pubs.css" />
+
+</head>
+<body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099"
+      alink="#ff9900" >
+
+<table cellspacing="15" border="0" align="center" width="100%%">
+%(entries)s
+</table>
+</body>
+</html>
--- a/i2p2www/anonbib/anonbib.bib
+++ b/i2p2www/anonbib/anonbib.bib
--- a/i2p2www/anonbib/anonbib.cfg
+++ b/i2p2www/anonbib/anonbib.cfg
@ -0,0 +1,163 @@
+# Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+# Our input filename.
+MASTER_BIB = "./anonbib.bib"
+
+# Where do we put generated HTML?
+OUTPUT_DIR = "."
+
+# Where do we put cached papers (relative to OUTPUT_DIR)
+CACHE_DIR = "cache"
+
+# Where do we cache citations papers (relative to OUTPUT_DIR)
+CITE_CACHE_DIR = "cite_cache"
+
+# Are there subsections for cached papers?  This is useful for putting
+# different Apache permission on different directories.
+CACHE_SECTIONS = [ ]
+
+# Only include entries that have this key.  This is one way to
+# generate multiple bibliographies from the same source.  Currently
+# deprecated in favor of tags.
+#
+#example: REQUIRE_KEY = "www_selected"
+#
+REQUIRE_KEY = None
+
+# Timeout when downloading from a server while caching, in seconds.
+DOWNLOAD_CONNECT_TIMEOUT = 15
+
+# Template files.
+TEMPLATE_FILE = "./_template_.html"
+BIBTEX_TEMPLATE_FILE = "./_template_bibtex.html"
+
+# Map from author name regex to author homepage.
+AUTHOR_URLS = {
+    'Ross.*Anderson' : 'http://www.cl.cam.ac.uk/users/rja14/',
+    'Alessandro.*Acquisti' : 'http://www.heinz.cmu.edu/~acquisti/index.html',
+    'Agrawal' : 'http://www.research.ibm.com/people/a/agrawal/',
+    'Adam.*Back' : 'http://www.cypherspace.org/~adam/',
+    'Berthold' : 'http://page.inf.fu-berlin.de/~berthold/',
+    'Borisov' : 'http://hatswitch.org/~nikita/',
+    'Bettati' : 'http://faculty.cs.tamu.edu/bettati/',
+    'Miguel.*Castro' : 'http://research.microsoft.com/users/mcastro/',
+    'Chaum' : 'http://www.chaum.com/',
+    'J.*Claessens' : 'http://www.esat.kuleuven.be/~joclaess/',
+    'R.*Clayton' : 'http://www.cl.cam.ac.uk/~rnc1/',
+    'Wei Dai' : 'http://www.eskimo.com/~weidai/',
+    'Danezis' : 'http://homes.esat.kuleuven.be/~gdanezis/',
+    'Claudia.*az' : 'http://www.esat.kuleuven.be/~cdiaz/',
+    'Dingledine' : 'http://www.freehaven.net/~arma/cv.html',
+    'Desmedt' : 'http://www.cs.fsu.edu/~desmedt/',
+    'Douceur' : 'http://research.microsoft.com/~johndo/',
+    'N.*Hopper' : 'http://www-users.cs.umn.edu/~hopper/',
+    'Michael.*Freedman' : 'http://www.scs.cs.nyu.edu/~mfreed/',
+    'Gergely' : 'http://www.planeforge.com/home/tgm',
+    'Ian.*Goldberg' : 'http://www.cs.uwaterloo.ca/~iang/',
+    'Christian.*Grothoff' : 'http://grothoff.org/christian/',
+    'D.*Hopwood' : 'http://www.users.zetnet.co.uk/hopwood/',
+    'Jakobsson' : 'http://www2.parc.com/csl/members/mjakobss/markus-jakobsson.htm',
+    'Juels' : 'http://www.rsasecurity.com/rsalabs/staff/bios/ajuels/',
+    'Kaashoek' : 'http://pdos.csail.mit.edu/~kaashoek/',
+    'K.*Kurosawa' : 'http://kuro.cis.ibaraki.ac.jp/~kurosawa/',
+    'H.*Langos' : 'http://www.wh9.tu-dresden.de/~heinrich/',
+    'B.*Liskov' : 'http://www.pmg.lcs.mit.edu/barbara_liskov.html',
+    'Mathewson' : 'http://www.wangafu.net/~nickm/',
+    'Mazi&egrave;res' : 'http://www.scs.cs.nyu.edu/~dm/',
+    'B.*M&ouml;ller' : ('http://www.informatik.tu-darmstadt.de/TI/'
+                        'Mitarbeiter/moeller.html'),
+    'U.*M&ouml;ller' : 'http://www.ulfm.de/',
+    'D.*Molnar' : 'http://www.cs.berkeley.edu/~dmolnar/',
+    'R.*Morris' : 'http://www.pdos.lcs.mit.edu/~rtm/',
+    'S.*Murdoch' : 'http://www.cl.cam.ac.uk/users/sjm217/',
+    'A.*Pashalidis' : 'http://www.xrtc.com/',
+    'A.*Pfitzmann' : 'http://dud.inf.tu-dresden.de/~pfitza/',
+    'B.*Pfitzmann' : 'http://www.zurich.ibm.com/~bpf/',
+    'B.*Preneel' : 'http://www.esat.kuleuven.be/~preneel/',
+    'Daniel.*Simon' : 'http://research.microsoft.com/crypto/dansimon/me.htm',
+    'Rackoff' : 'http://www.cs.toronto.edu/DCS/People/Faculty/rackoff.html',
+    'Jean F' : 'http://www.geocities.com/j_f_raymond/',
+    'M.*Rennhard' : 'http://www.tik.ee.ethz.ch/~rennhard/',
+    'M.*Reiter' : 'http://www.ece.cmu.edu/~reiter/',
+    'Rivest' : 'http://theory.lcs.mit.edu/~rivest/',
+    'Avi.*Rubin' : 'http://avirubin.com/',
+    'Sassaman' : 'http://homes.esat.kuleuven.be/~lsassama/',
+    'Serjantov' : 'http://home.arachsys.com/~aas/',
+    'S.*Seys' : 'http://www.esat.kuleuven.be/~sseys/',
+    'Shoup' : 'http://www.shoup.net/',
+    'Syverson' : 'http://www.syverson.org/',
+    'Tsudik' : 'http://www.ics.uci.edu/~gts/c.html',
+    'M.*Waidner' : 'http://www.zurich.ibm.com/~wmi/',
+    'David.*Wagner' : 'http://www.cs.berkeley.edu/~daw/',
+    'M.*Waldman' : 'http://cs1.cs.nyu.edu/~waldman/',
+    'B.*Waters' : 'http://www.cs.utexas.edu/~bwaters/',
+    'Chenxi.*Wang' : 'http://www.ece.cmu.edu/~chenxi/',
+    'M.*Wright' : 'http://ranger.uta.edu/~mwright/',
+    'B.*Levine' : 'http://prisms.cs.umass.edu/brian/',
+    'T.*Benjamin' : 'http://www.cs.umass.edu/~tshb/',
+    'B.*Defend' : 'http://www.cs.umass.edu/~defend/',
+    'K.*Fu' : 'http://www.cs.umass.edu/~kevinfu/',
+    'J.*Camenisch' : 'http://www.zurich.ibm.com/~jca/',
+    'S.*Hohenberger' : 'http://www.cs.jhu.edu/~susan/',
+    'M.*Kohlweiss' : 'http://homes.esat.kuleuven.be/~mkohlwei/',
+    'A.*Lysyanskaya' : 'http://www.cs.brown.edu/~anna/',
+    'M.*Meyerovich' : 'http://www.cs.brown.edu/~mira/',
+    'P.*Zieli.*ski' : 'http://www.cl.cam.ac.uk/~pz215/',
+    'S.*Zander' : 'http://caia.swin.edu.au/cv/szander/'
+    }
+
+# List of paterns for author names _not_ to do an initial-tolerant
+# match on when building section list.  E.g., if "J\\. Smith" is in
+# this list, he won't be folded into "John Smith".
+NO_COLLAPSE_AUTHORS = [
+
+]
+
+# Map from LaTeX-style name of author to collapse to canonical name.
+COLLAPSE_AUTHORS = {
+    "Nicholas Mathewson": "Nick Mathewson",
+    }
+
+# Map from author pattern to collation key.
+# This keeps 'Zero Knowledge Systems' from getting alphabetized as "Systems,
+# Zero Knowledge."
+ALPHABETIZE_AUTHOR_AS = {
+    "Zero.*Knowledge.*Systems": "Zero Knowledge Systems",
+    "Carlos.*Aguilar.*Melchor": "Aguilar Melchor Carlos",
+    }
+
+# Map of strings to initialize BibTeX parsing with.
+INITIAL_STRINGS = {
+    # SECTIONS
+     'sec_mix' : "Mix Networks: Design",
+     'sec_mixattacks' : "Mix Networks: Attacks",
+     'sec_stream' : "Stream-based anonymity",
+     'sec_traffic' : "Traffic analysis",
+     'sec_pub' : "Anonymous publication",
+     'sec_pir' : "Private Information Retrieval",
+     'sec_nym' : "Pseudonymity"
+}
+
+# Don't put in any entries of this type.
+OMIT_ENTRIES = ("proceedings", "journal")
+
+# List of all recognized values for www_tags.
+ALL_TAGS = ("selected", )
+
+# Titles of page, by tag.
+TAG_TITLES = { "": "The Free Haven Anonymity Bibliography",
+               "selected": "Free Haven's Selected Papers in Anonymity"
+              }
+
+# As TAG_TITLES, but shorter.
+TAG_SHORT_TITLES = { "": "Anonymity Bibliography",
+                     "selected": "Selected Papers in Anonymity",
+                    }
+
+# Directories where tag pages get generated.
+TAG_DIRECTORIES = { '': "full",
+                    "selected": "" }
+
+# Make cached stuff group-writable.  Make sure that your cache directories
+# are sticky!
+CACHE_UMASK = 002
--- a/i2p2www/anonbib/config.py
+++ b/i2p2www/anonbib/config.py
@ -0,0 +1,56 @@
+# Copyright 2003-2006, Nick Mathewson.  See LICENSE for licensing info.
+
+import re
+
+_KEYS = [ "ALL_TAGS",
+          "ALPHABETIZE_AUTHOR_AS","AUTHOR_URLS","CACHE_DIR","CACHE_SECTIONS",
+          "CACHE_UMASK",
+          "CITE_CACHE_DIR",
+          "COLLAPSE_AUTHORS",
+          "DOWNLOAD_CONNECT_TIMEOUT","INITIAL_STRINGS",
+          "MASTER_BIB", "NO_COLLAPSE_AUTHORS", "OMIT_ENTRIES",
+          "OUTPUT_DIR", "TEMPLATE_FILE", "BIBTEX_TEMPLATE_FILE",
+          "REQUIRE_KEY", "TAG_TITLES", "TAG_DIRECTORIES", "TAG_SHORT_TITLES",
+          ]
+
+for _k in _KEYS:
+    globals()[_k]=None
+del _k
+
+def load(cfgFile):
+    mod = {}
+    execfile(cfgFile, mod)
+    for _k in _KEYS:
+        try:
+            globals()[_k]=mod[_k]
+        except KeyError:
+            raise KeyError("Configuration option %s is missing"%_k)
+
+    INITIAL_STRINGS.update(_EXTRA_INITIAL_STRINGS)
+    AUTHOR_RE_LIST[:] = [
+        (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items()
+        ]
+
+    NO_COLLAPSE_AUTHORS_RE_LIST[:] = [
+        re.compile(pat, re.I) for pat in NO_COLLAPSE_AUTHORS
+        ]
+
+    ALPHABETIZE_AUTHOR_AS_RE_LIST[:] = [
+        (re.compile(k, re.I), v,) for k,v in ALPHABETIZE_AUTHOR_AS.items()
+        ]
+
+_EXTRA_INITIAL_STRINGS = {
+    # MONTHS
+     'jan' : 'January',         'feb' : 'February',
+     'mar' : 'March',           'apr' : 'April',
+     'may' : 'May',             'jun' : 'June',
+     'jul' : 'July',            'aug' : 'August',
+     'sep' : 'September',       'oct' : 'October',
+     'nov' : 'November',        'dec' : 'December',
+}
+
+AUTHOR_RE_LIST = []
+
+NO_COLLAPSE_AUTHORS_RE_LIST = []
+
+ALPHABETIZE_AUTHOR_AS_RE_LIST = []
--- a/i2p2www/anonbib/css/main.css
+++ b/i2p2www/anonbib/css/main.css
@ -0,0 +1,111 @@
+img {
+   border: 0px;
+}
+
+BODY {
+  background-color: #FFF;
+  color: #000;
+  margin: 0px;
+}
+
+FORM {
+  margin-top: 0.5em;
+  margin-bottom: 0.5em;
+}
+
+P, TD {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+}
+
+P.contact {
+  text-align: center;  
+}
+
+P.contact A {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+  font-weight: normal;  
+}
+
+SPAN.email {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace; 
+  font-weight: bold;
+}
+
+P IMG {
+  vertical-align: text-bottom;
+}
+
+P.crumbbreadth {
+  margin-top: 0.25em;
+}
+
+.compact {
+  margin-top: -0.5em;
+  text-indent: 0em;
+}
+
+SPAN.biblio {
+  font-style: italic;
+}
+
+SPAN.biblio A {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+  font-weight: normal;
+  text-decoration: underline;
+}
+
+SPAN.availability {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
+  font-weight: normal;
+}
+
+UL {
+  list-style: outside;
+}
+
+UL.expand {
+  margin-bottom: 1em;
+}
+
+UL.sections {
+  list-style: none;
+}
+
+/* Font-level properties */
+
+PRE {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
+}
+
+STRONG, A {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Rockwell", "Lucida Sans Unicode", monospace;
+  font-weight: bold;
+}
+
+A:link {
+  color: #B00;
+}
+
+A:visited {
+  color: #903;
+}
+
+H1, H2, H3, H4, H5, H6 {
+  font-family: lucidatypewriter, "Lucida Typewriter", "Lucida Console", Monaco, monospace;
+}
+
+H1 A, H2 A, H3 A, H4 A, H5 A, H6 A {
+  font-family: lucidatypewriter, "Lucida Typewriter", "Lucida Console", Monaco, monospace;
+}
+
+H1 {
+  color: #00B;
+}
+
+H2 {
+  color: #006;
+}
+
+H3 {
+  color: #006;
+}
--- a/i2p2www/anonbib/css/pubs.css
+++ b/i2p2www/anonbib/css/pubs.css
@ -0,0 +1,121 @@
+SPAN.title {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+  font-weight: bold;
+}
+
+SPAN.author {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+  font-weight: normal;
+}
+
+SPAN.availability {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
+  font-weight: normal;
+}
+
+SPAN.author A {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+  font-weight: normal;
+}
+
+SPAN.biblio {
+  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
+  font-style: italic;
+}
+
+SPAN.biblio A {
+  text-decoration: underline;
+}
+
+SPAN.bad {
+  text-decoration: underline;
+  color: #000;
+  background-color: #FDF;
+}
+
+P.remarks {
+  font-family: serif;
+  margin-top: 0.3em;
+  margin-bottom: 0;
+  margin-left: 5em;
+  padding-left: 0.5em;
+  border-width: 0 0 0 5px;
+  border-color: black;
+  border-style: solid;
+}
+
+P.remarks A {
+  text-decoration: underline;
+}
+
+P.l1 {
+  margin-left: 0.5em;
+}
+
+P.l2 {
+  margin-left: 1em;
+  margin-top: 0.3em;
+  margin-bottom: 0.3em;
+}
+
+P.entry {
+  margin-top: 0.7em;
+  margin-bottom: 0;
+}
+
+DIV.impEntry {
+  border-width: 1px;
+  border-color: black;
+  border-style: solid;
+  background-color: #FFE;
+  padding: 0.3em;
+  margin-top: 0.7em;
+  margin-bottom: 0;
+}  
+
+P.impEntry {
+  background-color: #FFE;
+  padding: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+DIV.draftEntry {
+ /*
+  border-width: 1px;
+  border-color: black;
+  border-style: solid;
+  padding: 0.3em;
+  margin-top: 0.7em;
+  margin-bottom: 0;
+*/
+}  
+
+P.draftEntry {
+ color: #555;
+  padding: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+TABLE.sidebar {
+  border-width: 2px;
+  border-color: black;
+  border-style: solid;
+  background-color: #CFF;
+}
+
+TD.bibtex {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;  
+  border-width: 2px;
+  font-weight: normal; 
+  border-color: black;
+  border-style: solid;
+  background-color: #DFF;
+}
+
+PRE.bibtex {
+  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace; 
+  font-size: smaller;
+}
+
--- a/i2p2www/anonbib/gold.gif
+++ b/i2p2www/anonbib/gold.gif
--- a/i2p2www/anonbib/metaphone.py
+++ b/i2p2www/anonbib/metaphone.py
@ -0,0 +1,193 @@
+#!/usr/bin/python2
+# Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+"""metaphone.py -- Pure-python metaphone implementation.
+
+   (This is not guaranteed to match the real metaphone algorithm; I
+   haven't tested it thorougly enough.  Let me know if you find bugs.
+
+   Based on the original C++ metaphone implementation.)
+"""
+
+TRIPLES = {
+    'dge': 'j',
+    'dgi': 'j',
+    'dgy': 'j',
+    'sia': '+x',
+    'sio': '+x',
+    'tia': '+x',
+    'tio': '+x',
+    'tch': '',
+    'tha': '0',
+    'the': '0',
+    'thi': '0',
+    'tho': '0',
+    'thu': '0',
+    }
+
+DOUBLES = {
+    'ph' : 'f',
+    'sh' : 'x'
+    }
+
+SINGLETONS = {
+    'd': 't',
+    'f': 'f',
+    'j': 'j',
+    'l': 'l',
+    'm': 'm',
+    'n': 'n',
+    'r': 'r',
+    'p': 'p',
+    'q': 'k',
+    'v': 'f',
+    'x': 'ks',
+    'z': 's',
+}
+
+ALLCHARS = "".join(map(chr, range(256)))
+NONLCCHARS = "".join([c for c in ALLCHARS if not c.islower()])
+def metaphone(s):
+    """Return the metaphone equivalent of a provided string"""
+    s = s.lower()
+    s = s.translate(ALLCHARS, NONLCCHARS)
+
+    if not s: return ""
+
+    # If ae, gn, kn, pn, wr then drop the first letter.
+    if s[:2] in ("ae", "gn", "kn", "pn", "wr"):
+        s = s[1:]
+
+    # Change "x" to "s"
+    if s[0] == 'x':
+        s = "s%s" % s[1:]
+
+    # Get rid of "h" in "wh".
+    if s[:2] == 'wh':
+        s = "w%s" % s[1:]
+
+    # Get rid of s from end.
+    if s[-1] == 's':
+        s = s[:-1]
+
+    result = []
+    prevLtr = ' '
+    vowelBefore = 0
+    lastChar = len(s)-1
+    for idx in range(len(s)):
+        curLtr = s[idx]
+        # If first char is a vowel, keep it.
+        if curLtr in "aeiou":
+            if idx == 0:
+                result.append(curLtr)
+            continue
+
+        # Skip double letters.
+        if idx < lastChar:
+            if curLtr == s[idx+1]:
+                continue
+
+        try:
+            r = TRIPLES[s[idx:idx+3]]
+            if r == "+x":
+                if idx > 1:
+                    result.append("x")
+                    continue
+            else:
+                result.append(r)
+                continue
+        except KeyError:
+            pass
+        try:
+            r = DOUBLES[s[idx:idx+2]]
+            result.append(r)
+            continue
+        except KeyError:
+            pass
+        try:
+            r = SINGLETONS[s[idx]]
+            result.append(r)
+            continue
+        except KeyError:
+            pass
+
+        if idx > 0:
+            prevLtr = s[idx-1]
+            vowelBefore = prevLtr in "aeiou"
+        curLtr = s[idx]
+
+        nextLtr2 = ' '
+        if idx < lastChar:
+            nextLtr = s[idx+1]
+            vowelAfter = nextLtr in "aeiou"
+            frontvAfter = nextLtr in "eiy"
+            if idx+1 < lastChar:
+                nextLtr2 = s[idx+2]
+        else:
+            nextLtr = ' '
+            vowelAfter = frontvAfter = 0
+
+
+        if curLtr == 'b':
+            if idx == lastChar and prevLtr == 'm':
+                pass
+            else:
+                result.append(curLtr)
+        elif curLtr == 'c':
+            # silent 'sci', 'sce, 'scy', 'sci', etc OK.
+            if not (prevLtr == 's' and frontvAfter):
+                if nextLtr in 'ia':
+                    result.append("x")
+                elif frontvAfter:
+                    result.append("s")
+                elif prevLtr == 's' and nextLtr == 'h':
+                    result.append('k')
+                elif nextLtr == 'h':
+                    if idx == 0 and nextLtr2 in "aeiou":
+                        result.append('k')
+                    else:
+                        result.append('x')
+                elif prevLtr == 'c':
+                    result.append('c')
+                else:
+                    result.append('k')
+        elif curLtr == 'g':
+            if (idx < lastChar-1) and nextLtr == 'h':
+                pass
+            elif s[idx:] == 'gned':
+                pass
+            elif s[idx:] == 'gn':
+                pass
+            elif prevLtr == 'd' and frontvAfter:
+                pass
+            else:
+                hard = (prevLtr == 'g')
+                if frontvAfter and not hard:
+                    result.append('j')
+                else:
+                    result.append('k')
+        elif curLtr == 'h':
+            if prevLtr in 'csptg':
+                pass
+            elif vowelBefore and not vowelAfter:
+                pass
+            else:
+                result.append('h')
+        elif curLtr == 'k':
+            if prevLtr != 'c': result.append('k')
+        elif curLtr in 'wy':
+            if vowelAfter:
+                result.append(curLtr)
+
+    return "".join(result)
+
+def demo(a):
+    print a, "=>", metaphone(a)
+
+if __name__ == '__main__':
+    demo("Nick. Mathewson")
+
+    demo("joe schmidt")
+    demo("Beethoven")
+
+    demo("Because the world is round")
--- a/i2p2www/anonbib/rank.py
+++ b/i2p2www/anonbib/rank.py
@ -0,0 +1,202 @@
+# Make rankings of papers and authors for automatic classification of content hotness
+
+# Google Scholar address
+# http://scholar.google.com/scholar?as_epq=
+
+# Take care of the caching setup
+cache_expire = 60*60*24*30 # 30 days
+
+# Checks
+import config
+import os
+import sys
+from os.path import exists, isdir, join, getmtime
+from os import listdir, remove
+
+def remove_old():
+   # Remove all old cached files
+   filenames = listdir(cache_folder())
+   from time import time
+   now = time()
+   for f in filenames:
+      pf = join(cache_folder(), f)
+      time_mt =  getmtime(pf)
+      if now - time_mt > cache_expire: # 30 days
+         remove(pf)
+
+def cache_folder():
+   r = join(config.OUTPUT_DIR, config.CITE_CACHE_DIR)
+   if not exists(r):
+      os.makedirs(r)
+   assert isdir(r)
+   return r
+
+import re
+from urllib2 import urlopen, build_opener
+from urllib import quote
+from datetime import date
+import hashlib
+
+# A more handy hash
+def md5h(s):
+   m = hashlib.md5()
+   m.update(s)
+   return m.hexdigest()
+
+format_tested = 0
+
+def getPageForTitle(title, cache=True, update=True, save=True):
+   #Returns (citation-count, scholar url) tuple, or (None,None)
+   global format_tested
+   if not format_tested and update:
+      format_tested = 1
+      TestScholarFormat()
+
+   # Do not assume that the title is clean
+   title = re.sub("\s+", " ", title)
+   title = re.sub("[^'a-zA-Z0-9\. \-\/:]", "", title)
+   title = re.sub("'\/", " ", title)
+
+   # We rely on google scholar to return the article with this exact title
+   gurl = "http://scholar.google.com/scholar?as_q=&as_epq=%s&as_occt=title"
+
+   url = gurl % quote(title)
+
+   # Access cache or network
+   if exists(join(cache_folder(), md5h(url))) and cache:
+      return url, file(join(cache_folder(), md5h(url)),'r').read()
+   elif update:
+      print "Downloading rank for %r."%title
+
+      # Make a custom user agent (so that we are not filtered by Google)!
+      opener = build_opener()
+      opener.addheaders = [('User-agent', 'Anon.Bib.0.1')]
+
+      print "connecting..."
+      connection = opener.open(url)
+      print "reading"
+      page = connection.read()
+      print "done"
+      if save:
+         file(join(cache_folder(), md5h(url)),'w').write(page)
+      return url, page
+   else:
+      return url, None
+
+def getCite(title, cache=True, update=True, save=True):
+   url, page = getPageForTitle(title, cache=cache, update=update, save=save)
+   if not page:
+      return None,None
+
+   # Check if it finds any articles
+   if len(re.findall("did not match any articles", page)) > 0:
+      return (None, None)
+
+   # Kill all tags!
+   cpage = re.sub("<[^>]*>", "", page)
+
+   # Add up all citations
+   s = sum([int(x) for x in re.findall("Cited by ([0-9]*)", cpage)])
+   return (s, url)
+
+def getPaperURLs(title, cache=True, update=True, save=True):
+   url, page = getPageForTitle(title, cache=cache, update=update, save=save)
+   if not page:
+      return []
+   pages = re.findall(r'\&\#x25ba\;.*class=fl href="([^"]*)"', page)
+   return pages
+
+def get_rank_html(title, years=None, base_url=".", update=True,
+                  velocity=False):
+   s,url = getCite(title, update=update)
+
+   # Paper cannot be found
+   if s is None:
+      return ''
+
+   html = ''
+
+   url = url.replace("&","&amp;")
+
+   # Hotness
+   H,h = 50,5
+   if s >= H:
+      html += '<a href="%s"><img src="%s/gold.gif" alt="More than %s citations on Google Scholar" title="More than %s citations on Google Scholar" /></a>' % (url,base_url,H,H)
+   elif s >= h:
+      html += '<a href="%s"><img src="%s/silver.gif" alt="More than %s citations on Google Scholar" title="More than %s citations on Google Scholar" /></a>' % (url,base_url,h,h)
+
+   # Only include the velocity if asked.
+   if velocity:
+      # Velocity
+      d = date.today().year - int(years)
+      if d >= 0:
+         if 2 < s / (d +1) < 10:
+            html += '<img src="%s/ups.gif" />' % base_url
+         if 10 <= s / (d +1):
+            html += '<img src="%s/upb.gif" />' % base_url
+
+   return html
+
+def TestScholarFormat():
+   # We need to ensure that Google Scholar does not change its page format under our feet
+   # Use some cases to check if all is good
+   print "Checking google scholar formats..."
+   stopAndGoCites = getCite("Stop-and-Go MIXes: Providing Probabilistic Anonymity in an Open System", False)[0]
+   dragonCites = getCite("Mixes protected by Dragons and Pixies: an empirical study", False, save=False)[0]
+
+   if stopAndGoCites in (0, None):
+      print """OOPS.\n
+It looks like Google Scholar changed their URL format or their output format.
+I went to count the cites for the Stop-and-Go MIXes paper, and got nothing."""
+      sys.exit(1)
+
+   if dragonCites != None:
+      print """OOPS.\n
+It looks like Google Scholar changed their URL format or their output format.
+I went to count the cites for a fictitious paper, and found some."""
+      sys.exit(1)
+
+def urlIsUseless(u):
+   if u.find("freehaven.net/anonbib/") >= 0:
+      # Our own cache is not the primary citation for anything.
+      return True
+   elif u.find("owens.mit.edu") >= 0:
+      # These citations only work for 'members of the MIT community'.
+      return True
+   else:
+      return False
+
+URLTYPES=[ "pdf", "ps", "txt", "ps_gz", "html" ]
+
+if __name__ == '__main__':
+   # First download the bibliography file.
+   import BibTeX
+   suggest = False
+   if sys.argv[1] == 'suggest':
+      suggest = True
+      del sys.argv[1]
+
+   config.load(sys.argv[1])
+   if config.CACHE_UMASK != None:
+      os.umask(config.CACHE_UMASK)
+   bib = BibTeX.parseFile(config.MASTER_BIB)
+   remove_old()
+
+   print "Downloading missing ranks."
+   for ent in bib.entries:
+      getCite(ent['title'], cache=True, update=True)
+
+   if suggest:
+      for ent in bib.entries:
+         haveOne = False
+         for utype in URLTYPES:
+            if ent.has_key("www_%s_url"%utype):
+               haveOne = True
+               break
+         if haveOne:
+            continue
+         print ent.key, "has no URLs given."
+         urls = [ u for u in getPaperURLs(ent['title']) if not urlIsUseless(u) ]
+         for u in urls:
+            print "\t", u
+
--- a/i2p2www/anonbib/reconcile.py
+++ b/i2p2www/anonbib/reconcile.py
@ -0,0 +1,292 @@
+#!/usr/bin/python2
+# Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+"""Code to determine which entries are new and which are old.
+
+   To scan a new file, run "python reconcile.py anonbib.cfg new-file.bib".  This
+   will generate a new bibtex file called 'tmp.bib', with all the new entries
+   cleaned up a little, and all the duplicate entries commented out.
+"""
+
+import sys
+import re
+
+assert sys.version_info[:3] >= (2,2,0)
+
+import BibTeX
+import config
+import metaphone
+
+_MPCACHE = {}
+def soundsLike(s1, s2):
+    c = _MPCACHE
+    s1 = clean(s1)
+    s2 = clean(s2)
+    try:
+        m1 = c[s1]
+    except KeyError:
+        m1 = c[s1] = metaphone.metaphone(s1)
+    try:
+        m2 = c[s2]
+    except KeyError:
+        m2 = c[s2] = metaphone.metaphone(s2)
+
+    return m1 == m2
+
+def mphone(s):
+    c = _MPCACHE
+    s = clean(s)
+    try:
+        return c[s]
+    except:
+        m = c[s] = metaphone.metaphone(s)
+        return m
+
+def clean(s):
+    s = re.sub(r'\s+', ' ', s)
+    s = s.strip()
+    return s
+
+class MasterBibTeX(BibTeX.BibTeX):
+    def __init__(self):
+        BibTeX.BibTeX.__init__(self)
+
+    def buildIndex(self):
+        self.byTitle = {}
+        for ent in self.entries:
+            for t in self._titleForms(ent['title']):
+                self.byTitle.setdefault(t, []).append(ent)
+
+    def _titleForms(self, title):
+        title = title.lower()
+        title = re.sub(r'\b(an|a|the|of)\b', "", title)
+        title = clean(title)
+        res = [ mphone(title) ]
+        if ':' in title:
+            for t in title.split(":"):
+                res.append(mphone(t.strip()))
+        #print "%r\n   => %s" % (title,res)
+        return res
+
+    def _titlesAlike(self, t1, t2):
+        t1 = clean(t1)
+        t2 = clean(t2)
+        if t1 == t2:
+            return 2
+        tf1 = self._titleForms(t1)
+        tf2 = self._titleForms(t2)
+        for t in tf1:
+            if t in tf2: return 1
+        return 0
+
+    def _authorsAlike(self, a1, a2):
+        if not soundsLike(" ".join(a1.last)," ".join(a2.last)):
+            return 0
+
+        if (a1.first == a2.first and a1.von == a2.von
+            and a1.jr == a2.jr):
+            return 2
+
+
+        if soundsLike(" ".join(a1.first), " ".join(a2.first)):
+            return 1
+
+        if not a1.first or not a2.first:
+            return 1
+
+        if self._initialize(a1.first) == self._initialize(a2.first):
+            return 1
+
+        return 0
+
+    def _initialize(self, name):
+        name = " ".join(name).lower()
+        name = re.sub(r'([a-z])[a-z\.]*', r'\1', name)
+        name = clean(name)
+        return name
+
+    def _authorListsAlike(self, a1, a2):
+        if len(a1) != len(a2):
+            return 0
+        a1 = [ (a.last, a) for a in a1 ]
+        a2 = [ (a.last, a) for a in a2 ]
+        a1.sort()
+        a2.sort()
+        if len(a1) != len(a2):
+            return 0
+        r = 2
+        for (_, a1), (_, a2) in zip(a1,a2):
+            x = self._authorsAlike(a1,a2)
+            if not x:
+                return 0
+            elif x == 1:
+                r = 1
+        return r
+
+    def _entryDatesAlike(self, e1, e2):
+        try:
+            if clean(e1['year']) == clean(e2['year']):
+                return 2
+            else:
+                return 0
+        except KeyError:
+            return 1
+
+    def includes(self, ent, all=0):
+        title = ent['title']
+        candidates = []
+        for form in self._titleForms(title):
+            try:
+                candidates.extend(self.byTitle[form])
+            except KeyError:
+                pass
+        goodness = []
+        for knownEnt in candidates:
+            match = (self._entryDatesAlike(ent, knownEnt) *
+                     self._titlesAlike(ent['title'], knownEnt['title']) *
+                     self._authorListsAlike(ent.parsedAuthor,
+                                            knownEnt.parsedAuthor) )
+            if match:
+                goodness.append((match, knownEnt))
+        goodness.sort()
+        if all:
+            return goodness
+        if goodness:
+            return goodness[-1]
+        else:
+            return None, None
+
+    def demo(self):
+        for e in self.entries:
+            matches = self.includes(e, 1)
+            m2 = []
+            mids = []
+            for g,m in matches:
+                if id(m) not in mids:
+                    mids.append(id(m))
+                    m2.append((g,m))
+            matches = m2
+
+            if not matches:
+                print "No match for %s"%e.key
+            if matches[-1][1] is e:
+                print "%s matches for %s: OK."%(len(matches), e.key)
+            else:
+                print "%s matches for %s: %s is best!" %(len(matches), e.key,
+                                                         matches[-1][1].key)
+            if len(matches) > 1:
+                for g, m in matches:
+                    print "%%%% goodness", g
+                    print m
+
+
+def noteToURL(note):
+    " returns tp, url "
+    note = note.replace("\n", " ")
+    m = re.match(r'\s*(?:\\newline\s*)*\s*\\url{(.*)}\s*(?:\\newline\s*)*',
+                 note)
+    if not m:
+        return None
+    url = m.group(1)
+    for suffix, tp in ((".html", "html"),
+                       (".ps", "ps"),
+                       (".ps.gz", "ps_gz"),
+                       (".pdf", "pdf"),
+                       (".txt", "txt")):
+        if url.endswith(suffix):
+            return tp,url
+    return "???", url
+
+all_ok = 1
+def emit(f,ent):
+    global all_ok
+
+    errs = ent._check()
+    if master.byKey.has_key(ent.key.strip().lower()):
+        errs.append("ERROR: Key collision with master file")
+
+    if errs:
+        all_ok = 0
+
+    note = ent.get("note")
+    if ent.getURL() and not note:
+        ent['note'] = "\url{%s}"%ent.getURL()
+    elif note:
+        m = re.match(r'\\url{(.*)}', note)
+        if m:
+            url = m.group(0)
+            tp = None
+            if url.endswith(".txt"):
+                tp = "txt"
+            elif url.endswith(".ps.gz"):
+                tp = "ps_gz"
+            elif url.endswith(".ps"):
+                tp = "ps_gz"
+            elif url.endswith(".pdf"):
+                tp = "pdf"
+            elif url.endswith(".html"):
+                tp = "html"
+            if tp:
+                ent['www_%s_url'%tp] = url
+
+    if errs:
+        all_ok = 0
+    for e in errs:
+        print >>f, "%%%%", e
+
+    print >>f, ent.format(77, 4, v=1, invStrings=invStrings)
+
+def emitKnown(f, ent, matches):
+    print >>f, "%% Candidates are:", ", ".join([e.key for g,e in matches])
+    print >>f, "%%"
+    print >>f, "%"+(ent.format(77,4,1,invStrings).replace("\n", "\n%"))
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print "reconcile.py expects 2 arguments"
+        sys.exit(1)
+
+    config.load(sys.argv[1])
+
+    print "========= Scanning master =========="
+    master = MasterBibTeX()
+    master = BibTeX.parseFile(config.MASTER_BIB, result=master)
+    master.buildIndex()
+
+    print "========= Scanning new file ========"
+    try:
+        fn = sys.argv[2]
+        input = BibTeX.parseFile(fn)
+    except BibTeX.ParseError, e:
+        print "Error parsing %s: %s"%(fn,e)
+        sys.exit(1)
+
+    f = open('tmp.bib', 'w')
+    keys = input.newStrings.keys()
+    keys.sort()
+    for k in keys:
+        v = input.newStrings[k]
+        print >>f, "@string{%s = {%s}}"%(k,v)
+
+    invStrings = input.invStrings
+
+    for e in input.entries:
+        if not (e.get('title') and e.get('author')):
+            print >>f, "%%\n%%%% Not enough information to search for a match: need title and author.\n%%"
+            emit(f, e)
+            continue
+
+        matches = master.includes(e, all=1)
+        if not matches:
+            print >>f, "%%\n%%%% This entry is probably new: No match found.\n%%"
+            emit(f, e)
+        else:
+            print >>f, "%%"
+            print >>f, "%%%% Possible match found for this entry; max goodness",\
+                  matches[-1][0], "\n%%"
+            emitKnown(f, e, matches)
+
+    if not all_ok:
+        print >>f, "\n\n\nErrors remain; not finished.\n"
+
+    f.close()
--- a/i2p2www/anonbib/silver.gif
+++ b/i2p2www/anonbib/silver.gif
--- a/i2p2www/anonbib/testbib/pdos.bib
+++ b/i2p2www/anonbib/testbib/pdos.bib
--- a/i2p2www/anonbib/tests.py
+++ b/i2p2www/anonbib/tests.py
@ -0,0 +1,86 @@
+#!/usr/bin/python2
+# Copyright 2004-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+"""Unit tests for anonbib."""
+
+import BibTeX
+import metaphone
+#import reconcile
+#import writeHTML
+#import updateCache
+
+import unittest
+
+class MetaphoneTests(unittest.TestCase):
+    def testMetaphone(self):
+        pass
+
+class BibTeXTests(unittest.TestCase):
+    def testTranslation(self):
+        ut = BibTeX.url_untranslate
+        self.assertEquals(ut("Fred"),"Fred")
+        self.assertEquals(ut("Hello, World."), "Hello_2c_20World.")
+
+        te = BibTeX.TeXescapeURL
+        ute = BibTeX.unTeXescapeURL
+        self.assertEquals(te("http://example/~me/my_file"),
+                          r"http://example/\{}~me/my\_file")
+        self.assertEquals(ute(r"http:{}//example/\{}~me/my\_file"),
+                          "http://example/~me/my_file")
+
+        h = BibTeX.htmlize
+        self.assertEquals(h("Hello, world"), "Hello, world")
+        self.assertEquals(h(r"\'a\`e\'{i}(\'\i)\"o&\^u"),
+                          "&aacute;&egrave;&iacute;(&iacute;)&ouml;&amp;"
+                          "&ucirc;")
+        self.assertEquals(h(r"\~n and \c{c}"), "&ntilde; and &ccedil;")
+        self.assertEquals(h(r"\AE---a ligature"), "&AElig;&mdash;a ligature")
+        self.assertEquals(h(r"{\it 33}"), " 33")
+        self.assertEquals(h(r"Pages 33--99 or vice--versa?"),
+                          "Pages 33-99 or vice&ndash;versa?")
+
+        t = BibTeX.txtize
+        self.assertEquals(t("Hello, world"), "Hello, world")
+        self.assertEquals(t(r"\'a\`e\'{i}(\'\i)\"o&\^u"),
+                          "aei(i)o&u")
+        self.assertEquals(t(r"\~n and \c{c}"), "n and c")
+        self.assertEquals(t(r"\AE---a ligature"), "AE---a ligature")
+        self.assertEquals(t(r"{\it 33}"), " 33")
+        self.assertEquals(t(r"Pages 33--99 or vice--versa?"),
+                          "Pages 33--99 or vice--versa?")
+
+    def authorsParseTo(self,authors,result):
+        pa = BibTeX.parseAuthor(authors)
+        self.assertEquals(["|".join(["+".join(item) for item in
+                                     [a.first,a.von,a.last,a.jr]])
+                           for a in pa],
+                          result)
+
+    def testAuthorParsing(self):
+        pa = BibTeX.parseAuthor
+        PA = BibTeX.ParsedAuthor
+        apt = self.authorsParseTo
+
+        apt("Nick A. Mathewson and Roger Dingledine",
+            ["Nick+A.||Mathewson|", "Roger||Dingledine|"])
+        apt("John van Neumann", ["John|van|Neumann|"])
+        apt("P. Q. Z. de la Paz", ["P.+Q.+Z.|de+la|Paz|"])
+        apt("Cher", ["||Cher|"])
+        apt("Smith, Bob", ["Bob||Smith|"])
+        apt("de Smith, Bob", ["Bob|de|Smith|"])
+        apt("de Smith, Bob Z", ["Bob+Z|de|Smith|"])
+        #XXXX Fix this.
+        #apt("Roberts Smith Wilkins, Bob Z", ["Bob+Z||Smith+Wilkins|"])
+        apt("Smith, Jr, Bob", ["Bob||Smith|Jr"])
+
+        #XXXX Fix this.
+        #apt("R Jones, Jr.", ["R||Jones|Jr."])
+        apt("Smith, Bob and John Smith and Last,First",
+            ["Bob||Smith|", "John||Smith|", "First||Last|"])
+        apt("Bob Smith and John Smith and John Doe",
+            ["Bob||Smith|", "John||Smith|", "John||Doe|"])
+
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/i2p2www/anonbib/upb.gif
+++ b/i2p2www/anonbib/upb.gif
--- a/i2p2www/anonbib/updateCache.py
+++ b/i2p2www/anonbib/updateCache.py
@ -0,0 +1,169 @@
+#!/usr/bin/python
+# Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+"""Download files in bibliography into a local cache.
+"""
+
+import os
+import sys
+import signal
+import time
+import gzip
+
+import BibTeX
+import config
+import urllib2
+import getopt
+import socket
+import errno
+import httplib
+
+FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ]
+BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ]
+
+class UIError(Exception):
+    pass
+
+def tryUnlink(fn):
+    try:
+        os.unlink(fn)
+    except OSError:
+        pass
+
+def getCacheFname(key, ftype, section):
+    return BibTeX.smartJoin(config.OUTPUT_DIR,config.CACHE_DIR,
+                            section,
+                            "%s.%s"%(key,ftype))
+
+def downloadFile(key, ftype, section, url,timeout=None):
+    if timeout is None:
+        timeout = config.DOWNLOAD_CONNECT_TIMEOUT
+    fname = getCacheFname(key, ftype, section)
+    parent = os.path.split(fname)[0]
+    if not os.path.exists(parent):
+        os.makedirs(parent)
+
+    fnameTmp = fname+".tmp"
+    fnameURL = fname+".url"
+    tryUnlink(fnameTmp)
+
+    def sigalrmHandler(sig,_):
+        pass
+    signal.signal(signal.SIGALRM, sigalrmHandler)
+    signal.alarm(timeout)
+    try:
+        try:
+            infile = urllib2.urlopen(url)
+        except httplib.InvalidURL, e:
+            raise UIError("Invalid URL %s: %s"%(url,e))
+        except IOError, e:
+            raise UIError("Cannot connect to url %s: %s"%(url,e))
+        except socket.error, e:
+            if getattr(e,"errno",-1) == errno.EINTR:
+                raise UIError("Connection timed out to url %s"%url)
+            else:
+                raise UIError("Error connecting to %s: %s"%(url, e))
+    finally:
+        signal.alarm(0)
+
+    mode = 'w'
+    if ftype in BIN_FILE_TYPES:
+        mode = 'wb'
+    outfile = open(fnameTmp, mode)
+    try:
+        while 1:
+            s = infile.read(1<<16)
+            if not s: break
+            outfile.write(s)
+    finally:
+        infile.close()
+        outfile.close()
+
+    urlfile = open(fnameURL, 'w')
+    print >>urlfile, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    if "\n" in url: url = url.replace("\n", " ")
+    print >>urlfile, url
+    urlfile.close()
+
+    os.rename(fnameTmp, fname)
+
+def getURLs(entry):
+    r = {}
+    for ftype in FILE_TYPES:
+        ftype2 = ftype.replace(".", "_")
+        url = entry.get("www_%s_url"%ftype2)
+        if url:
+            r[ftype] = url.strip().replace("\n", " ")
+    return r
+
+def getCachedURL(key, ftype, section):
+    fname = getCacheFname(key, ftype, section)
+    urlFname = fname+".url"
+    if not os.path.exists(fname) or not os.path.exists(urlFname):
+        return None
+    f = open(urlFname, 'r')
+    lines = f.readlines()
+    f.close()
+    if len(lines) != 2:
+        print >>sys.stderr, "ERROR: unexpected number of lines in", urlFname
+    return lines[1].strip()
+
+def downloadAll(bibtex, missingOnly=0):
+    """returns list of tuples of key, ftype, url, error"""
+    errors = []
+    for e in bibtex.entries:
+        urls = getURLs(e)
+        key = e.key
+        section = e.get("www_cache_section", ".")
+        for ftype, url in urls.items():
+            if missingOnly:
+                cachedURL = getCachedURL(key, ftype, section)
+                if cachedURL == url:
+                    print >>sys.stderr,"Skipping",url
+                    continue
+                elif cachedURL is not None:
+                    print >>sys.stderr,"URL for %s.%s has changed"%(key,ftype)
+                else:
+                    print >>sys.stderr,"I have no copy of %s.%s"%(key,ftype)
+            try:
+                downloadFile(key, ftype, section, url)
+                print "Downloaded",url
+            except UIError, e:
+                print >>sys.stderr, str(e)
+                errors.append((key,ftype,url,str(e)))
+            except (IOError, socket.error), e:
+                msg = "Error downloading %s: %s"%(url,str(e))
+                print >>sys.stderr, msg
+                errors.append((key,ftype,url,msg))
+        if urls.has_key("ps") and not urls.has_key("ps.gz"):
+            # Say, this is something we'd like to have gzipped locally.
+            psFname = getCacheFname(key, "ps", section)
+            psGzFname = getCacheFname(key, "ps.gz", section)
+            if os.path.exists(psFname) and not os.path.exists(psGzFname):
+                # This is something we haven't gzipped yet.
+                print "Compressing a copy of",psFname
+                outf = gzip.GzipFile(psGzFname, "wb")
+                inf = open(psFname, "rb")
+                while 1:
+                    s = inf.read(4096)
+                    if not s:
+                        break
+                    outf.write(s)
+                outf.close()
+                inf.close()
+
+    return errors
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        print "Loading from %s"%sys.argv[1]
+    else:
+        print >>sys.stderr, "Expected a single configuration file as an argument"
+        sys.exit(1)
+    config.load(sys.argv[1])
+
+    if config.CACHE_UMASK != None:
+        os.umask(config.CACHE_UMASK)
+
+    bib = BibTeX.parseFile(config.MASTER_BIB)
+    downloadAll(bib,missingOnly=1)
--- a/i2p2www/anonbib/ups.gif
+++ b/i2p2www/anonbib/ups.gif
--- a/i2p2www/anonbib/venue-checklist.txt
+++ b/i2p2www/anonbib/venue-checklist.txt
@ -0,0 +1,41 @@
+This file is to keep track of which volumes of which publications have
+been combed for anonymity papers and which we still have to add.
+
+=== DONE:
+
+ExampleConference (through 2008)
+
+PETS 2000-2003
+
+=== CLAIMED:
+
+PETS 2000-2010 -- Nick (claimed 6/16)
+ESORICS 1990-2010 -- Nick (claimed 6/16)
+CCS -- George (claimed 6/17)
+USENIX Security ("Oakland") -- George (claimed 6/17)
+
+=== SHOULD DO:
+
+Infohiding
+IEEE Security and privacy
+NDSS
+WPES
+WEIS
+Financial Crypto
+Eurocrypt
+Asiacrypt
+
+Search: Papers that cite Chaum's paper
+Search: Papers that cite the Tor paper
+Search: Papers that cite the original onion routing papers
+Search: Papers mentioning "anonymity" or "anonymous"
+Search: Papers mentioning "mixnet" or "mix-net"
+
+=== UNDERSERVED CONTENT; PLEASE SUGGEST SEARCHES AND VENUES
+
+Private information retrieval; PIR
+Anti-censorship; censorship
+Location privacy
+Anonymous credentials
+Anonymizing data
+Secure multiparty computation
--- a/i2p2www/anonbib/writeHTML.py
+++ b/i2p2www/anonbib/writeHTML.py
@ -0,0 +1,246 @@
+#!/usr/bin/python
+# Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+"""Generate indices by author, topic, date, and BibTeX key."""
+
+import sys
+import re
+import os
+import json
+
+assert sys.version_info[:3] >= (2,2,0)
+os.umask(022)
+
+import BibTeX
+import config
+
+def getTemplate(name):
+    f = open(name)
+    template = f.read()
+    f.close()
+    template_s, template_e = template.split("%(entries)s")
+    return template_s, template_e
+
+def pathLength(s):
+    n = 0
+    while s:
+        parent, leaf = os.path.split(s)
+        if leaf != '' and leaf != '.':
+            n += 1
+        s = parent
+    return n
+
+def writeBody(f, sections, section_urls, cache_path, base_url):
+    '''f: an open file
+       sections: list of (sectionname, [list of BibTeXEntry])
+       section_urls: map from sectionname to external url'''
+    for s, entries in sections:
+        u = section_urls.get(s)
+        sDisp = re.sub(r'\s+', ' ', s.strip())
+        sDisp = sDisp.replace(" ", "&nbsp;")
+        if u:
+            print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%(
+                (BibTeX.url_untranslate(s), u, sDisp)))
+        else:
+            print >>f, ('<li><h3><a name="%s">%s</a></h3>'%(
+                BibTeX.url_untranslate(s),sDisp))
+        print >>f, "<ul class='expand'>"
+        for e in entries:
+            print >>f, e.to_html(cache_path=cache_path, base_url=base_url)
+        print >>f, "</ul></li>"
+
+def writeHTML(f, sections, sectionType, fieldName, choices,
+              tag, config, cache_url_path, section_urls={}):
+    """sections: list of (sectionname, [list of BibTeXEntry])'''
+       sectionType: str
+       fieldName: str
+       choices: list of (choice, url)"""
+
+    title = config.TAG_TITLES[tag]
+    short_title = config.TAG_SHORT_TITLES[tag]
+    #
+    secStr = []
+    for s, _ in sections:
+        hts = re.sub(r'\s+', ' ', s.strip())
+        hts = s.replace(" ", "&nbsp;")
+        secStr.append("<p class='l2'><a href='#%s'>%s</a></p>\n"%
+                      ((BibTeX.url_untranslate(s),hts)))
+    secStr = "".join(secStr)
+
+    #
+    tagListStr = []
+    st = config.TAG_SHORT_TITLES.keys()
+    st.sort()
+    root = "../"*pathLength(config.TAG_DIRECTORIES[tag])
+    if root == "": root = "."
+    for t in st:
+        name = config.TAG_SHORT_TITLES[t]
+        if t == tag:
+            tagListStr.append(name)
+        else:
+            url = BibTeX.smartJoin(root, config.TAG_DIRECTORIES[t], "date.html")
+            tagListStr.append("<a href='%s'>%s</a>"%(url, name))
+    tagListStr = "&nbsp;|&nbsp;".join(tagListStr)
+
+    #
+    choiceStr = []
+    for choice, url in choices:
+        if url:
+            choiceStr.append("<a href='%s'>%s</a>"%(url, choice))
+        else:
+            choiceStr.append(choice)
+
+    choiceStr = ("&nbsp;|&nbsp;".join(choiceStr))
+
+    fields = { 'command_line' :  "",
+               'sectiontypes' :  sectionType,
+               'choices' : choiceStr,
+               'field': fieldName,
+               'sections' : secStr,
+               'otherbibs' : tagListStr,
+               'title': title,
+               'short_title': short_title,
+               "root" : root,
+         }
+
+    header, footer = getTemplate(config.TEMPLATE_FILE)
+    print >>f, header%fields
+    writeBody(f, sections, section_urls, cache_path=cache_url_path,
+              base_url=root)
+    print >>f, footer%fields
+
+def jsonDumper(obj):
+    if isinstance(obj, BibTeX.BibTeXEntry):
+        e = obj.entries.copy()
+        e['key'] = obj.key
+        return e
+    else:
+        raise TypeError("Do not know how to serialize %s"%(obj.__class,))
+
+def writePageSet(config, bib, tag):
+    if tag:
+        bib_entries = [ b for b in bib.entries
+                          if tag in b.get('www_tags', "").split() ]
+    else:
+        bib_entries = bib.entries[:]
+
+    if not bib_entries:
+        print >>sys.stderr, "No entries with tag %r; skipping"%tag
+        return
+
+    tagdir = config.TAG_DIRECTORIES[tag]
+    outdir = os.path.join(config.OUTPUT_DIR, tagdir)
+    cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir),
+                                      config.CACHE_DIR)
+    if not os.path.exists(outdir):
+        os.makedirs(outdir, 0755)
+    ##### Sorted views:
+
+    ## By topic.
+
+    entries = BibTeX.sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ")
+    entries = BibTeX.splitSortedEntriesBy(entries, "www_section")
+    if entries[-1][0].startswith("<span class='bad'>"):
+        entries[-1] = ("Miscellaneous", entries[-1][1])
+
+    entries = [ (s, BibTeX.sortEntriesByDate(ents))
+                for s, ents in entries
+                ]
+
+    f = open(os.path.join(outdir,"topic.html"), 'w')
+    writeHTML(f, entries, "Topics", "topic",
+              (("By topic", None),
+               ("By date", "./date.html"),
+               ("By author", "./author.html")
+               ),
+              tag=tag, config=config,
+              cache_url_path=cache_url_path)
+    f.close()
+
+    ## By date.
+
+    entries = BibTeX.sortEntriesByDate(bib_entries)
+    entries = BibTeX.splitSortedEntriesBy(entries, 'year')
+    for idx in -1, -2:
+        if entries[idx][0].startswith("<span class='bad'>"):
+            entries[idx] = ("Unknown", entries[idx][1])
+        elif entries[idx][0].startswith("forthcoming"):
+            entries[idx] = ("Forthcoming", entries[idx][1])
+    sections = [ ent[0] for ent in entries ]
+
+    first_year = int(entries[0][1][0]['year'])
+    try:
+        last_year = int(entries[-1][1][0].get('year'))
+    except ValueError:
+        last_year = int(entries[-2][1][0].get('year'))
+
+    years = map(str, range(first_year, last_year+1))
+    if entries[-1][0] == 'Unknown':
+        years.append("Unknown")
+
+    f = open(os.path.join(outdir,"date.html"), 'w')
+    writeHTML(f, entries, "Years", "date",
+              (("By topic", "./topic.html"),
+               ("By date", None),
+               ("By author", "./author.html")
+               ),
+              tag=tag, config=config,
+              cache_url_path=cache_url_path)
+    f.close()
+
+    ## By author
+    entries, url_map = BibTeX.splitEntriesByAuthor(bib_entries)
+
+    f = open(os.path.join(outdir,"author.html"), 'w')
+    writeHTML(f, entries, "Authors", "author",
+              (("By topic", "./topic.html"),
+               ("By date", "./date.html"),
+               ("By author", None),
+              ),
+              tag=tag, config=config,
+              cache_url_path=cache_url_path,
+              section_urls=url_map)
+    f.close()
+
+    ## The big BibTeX file
+
+    entries = bib_entries[:]
+    entries = [ (ent.key, ent) for ent in entries ]
+    entries.sort()
+    entries = [ ent[1] for ent in entries ]
+
+    ## Finding the root directory is done by writeHTML(), but
+    ## the BibTeX file doesn't use that, so repeat the code here
+    root = "../"*pathLength(config.TAG_DIRECTORIES[tag])
+    if root == "": root = "."
+
+    header,footer = getTemplate(config.BIBTEX_TEMPLATE_FILE)
+    f = open(os.path.join(outdir,"bibtex.html"), 'w')
+    print >>f, header % { 'command_line' : "",
+                          'title': config.TAG_TITLES[tag],
+                          'root': root }
+    for ent in entries:
+        print >>f, (
+            ("<tr><td class='bibtex'><a name='%s'>%s</a>"
+            "<pre class='bibtex'>%s</pre></td></tr>")
+            %(BibTeX.url_untranslate(ent.key), ent.key, ent.format(90,8,1)))
+    print >>f, footer
+    f.close()
+
+    f = open(os.path.join(outdir,"bibtex.json"), 'w')
+    json.dump(entries, f, default=jsonDumper)
+    f.close()
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        print "Loading from %s"%sys.argv[1]
+    else:
+        print >>sys.stderr, "Expected a single configuration file as an argument"
+        sys.exit(1)
+    config.load(sys.argv[1])
+
+    bib = BibTeX.parseFile(config.MASTER_BIB)
+
+    for tag in config.TAG_DIRECTORIES.keys():
+        writePageSet(config, bib, tag)