Added anonbib for use in papers list

Source: https://gitweb.torproject.org/anonbib.git Commit: b478fc493d4be2115185d94e077bf06196495417
2013-08-11 11:14:00 +00:00
parent e38ef21b71
commit 7c42ce8329
23 changed files with 11209 additions and 0 deletions
--- a/i2p2www/anonbib/BibTeX.py
+++ b/i2p2www/anonbib/BibTeX.py
--- a/i2p2www/anonbib/Makefile
+++ b/i2p2www/anonbib/Makefile
@ -0,0 +1,39 @@
 PYTHON=python
 VERSION=0.3-dev
 all:
 	$(PYTHON) writeHTML.py anonbib.cfg
 clean:
 	rm -f *~ */*~ *.pyc *.pyo
 update:
 	$(PYTHON) updateCache.py anonbib.cfg
 	$(PYTHON) rank.py anonbib.cfg
 suggest:
 	$(PYTHON) rank.py suggest anonbib.cfg
 test:
 	$(PYTHON) test.py
 veryclean: clean
 	rm -f author.html date.html topic.html bibtex.html tmp.bib
 TEMPLATES=_template_.html _template_bibtex.html
 CSS=css/main.css css/pubs.css
 BIBTEX=anonbib.bib
 SOURCE=BibTeX.py config.py metaphone.py reconcile.py updateCache.py \
 	writeHTML.py rank.py tests.py
 EXTRAS=TODO README Makefile ChangeLog anonbib.cfg gold.gif silver.gif \
 	upb.gif ups.gif
 DISTFILES=$(TEMPLATES) $(CSS) $(BIBTEX) $(SOURCE) $(EXTRAS)
 dist: clean
 	rm -rf anonbib-$(VERSION)
 	mkdir anonbib-$(VERSION)
 	tar cf - $(DISTFILES) | (cd anonbib-$(VERSION); tar xf -)
 	mkdir anonbib-$(VERSION)/cache
 	tar czf anonbib-$(VERSION).tar.gz anonbib-$(VERSION)
 	rm -rf anonbib-$(VERSION)
--- a/i2p2www/anonbib/README
+++ b/i2p2www/anonbib/README
@ -0,0 +1,52 @@
 anonbib 0.3 -- Code to generate the anonymity bibliography
 Copyright (c) 2003-2008 Nick Mathewson
 Based on 'PDOSBib' perl code by Eddie Kohler
 This software is licensed under the GNU GPL, version 2 or later.
 To use this software, you need to understand BibTeX and Python a
 little. If it breaks, you get to keep both pieces.  You will need
 Python 2.2 or later.
 To use this package:
   - Get a good BibTeX file.  You may want to mark it up with some of the
     extra keys used in our "anonbib.bib" file.  All of the additional
     Bibtex keys we use have the prefix "www_"; check out anonbib.bib
     for their usage.
   - Edit anonbib.cfg and _template_.html and _template_bibtex.html so they
     refer to your files, authors, topics, and so on.
   - Run 'python updateCache.py anonbib.cfg' to create a local cache of the
     papers in your bibliography based on their www_*_url entries.  (By
     default, the script will ignore any entries you have already cached.  To
     force a fresh download of a cached file, delete it.)
   - Run 'python rank.py anonbib.cfg' to download Google Scholar rankings of
     all the papers.
   - Run 'python writeHTML.py anonbib.cfg'.  Fix any errors you care about.
   - Re-run these scripts when you change the bibliography.
   - If you want to merge in big BibTeX files, try using the reconcile.py
     script.  See the comment at the start of the file for usage info.
 New in 0.3:
   - Support for Google Scholar rankings to denote hot/rising papers.
     Implemented by George Danezis.
   - Make reconcile script generate more useful output.
   - Add support for multiple bibliographies generated from a single bibtex
     source.  This is done via 'tags' on bibtex entries.  If an entry is
     tagged, it appears in the corresponding bibliographies.  This is good
     for generating a master bibliography and one or more selected readings
     lists from the same source.
   - Handle more errors when downloading files.
   - When fetching a paper with a .ps url, generate the .ps.gz file
     automatically.
   - Note an error when a crossref overrides an existing field in an entry.
   - Handle the Proceedings type correctly.
   - Enforce proper encoding on pages: it must be number--number.
   - 
--- a/i2p2www/anonbib/TODO
+++ b/i2p2www/anonbib/TODO
@ -0,0 +1,33 @@
 - More general tasks
        . Know about @book
 	. Write unit tests for everything
 	. Make name parsing vaguely sane
        - Maybe uncrossref in tmp.bib
        - Maybe pull important papers to the start of their sections?
        . Clean \{}~ when going from note to url; add \{}~ when making
          note from url.
        . Also clean \_ to _ and back
        - Look for urls in wherepublished.
        . Forgive newlines in wherepublished, note.
        - When sorting by date, entries with unknown months go into a magic
          "month zero" before January.  Is this right?
 	- Strip unused features.
 	o Take a configuration file on the command line instead of just
 	  importing config.py.
 - Cache tasks
 	- Generate a list of broken links
 	- Re-download all cached items if requested
 	- Clear dead items from cache
 	- Use HTTP HEAD requests to decide whetherto update stale
          elements in cache.
 	- Add ability to honor a "www_no_cache={1}" option for entries
          if the authors ask us not to cache them.
 	- Maybe, add ability to cache images from an HTML page.
 - Reconcile tasks
        - Document it.
        - Notice when there is new or different information of certain kinds
          (pages, dates, etc) in the new information.
--- a/i2p2www/anonbib/_template_.html
+++ b/i2p2www/anonbib/_template_.html
@ -0,0 +1,74 @@
 <?xml version="1.0"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 <meta http-equiv="Content-Style-Type" content="text/css" />
 <!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
     *** EDIT THE .bib FILE or _template_.html INSTEAD!
     Generated by `%(command_line)s'
     (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
 <!-- anon bib anonymity bibliography -->
 <title>%(title)s</title>
 <link rel="stylesheet" type="text/css" href="%(root)s/css/main.css" />
 <link rel="stylesheet" type="text/css" href="%(root)s/css/pubs.css" />
 </head>
 <body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099"
 alink="#ff9900" >
 <h1 align="center">%(short_title)s</h1>
 <p align="center">%(otherbibs)s</p>
 <p align="center">%(choices)s</p>
 <!-- Table 1: contains the sidebar and the body. -->
 <table cellspacing="3" cellpadding="0" border="0" align="center" width="100%%">
 <tr valign="top">
 <td>
 <!-- Table 2: The sidebar-->
 <table align="right" cellspacing="0" cellpadding="5" width="100"
     class="sidebar">
 <tr valign="top"><td><p class="l1"><strong>%(sectiontypes)s:</strong><br /></p>
 %(sections)s
 </td>
 </tr>
 </table><!-- End of table 2 -->
 </td>
 <td width="85%%">
 <h2>Publications by %(field)s</h2>
 <ul class="sections">
 %(entries)s
 </ul>
 </td>
 <td width="5%%"><br /></td>
 </tr>
 </table><!-- End of table 1 -->
 <p class="contact">Please send new or corrected entries to
 <a name="contact">
 <span class="email">&lt;anonbib&#64;free<!-- -->&#104;aven.net&gt;</span></a>.
 <br />
 If you can, please format them as BibTeX; see our
 <a href="bibtex.html">BibTeX source page</a> for examples.<br />
 Remember to include URLs if possible:
 <a href="http://citeseer.ist.psu.edu/online-nature01/">offline papers are
 less useful</a>.
 </p>
 <p>The source code to anonbib is now in Git. See <a href="https://gitweb.torproject.org/anonbib.git">the anonbib gitweb page</a> for more information.
 </p>
 </body>
 </html>
--- a/i2p2www/anonbib/_template_bibtex.html
+++ b/i2p2www/anonbib/_template_bibtex.html
@ -0,0 +1,28 @@
 <?xml version="1.0"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
 <meta http-equiv="Content-Style-Type" content="text/css" />
 <!-- *** I AM MACHINE GENERATED! DO NOT EDIT ME!
     *** EDIT THE .bib FILE or _template_.html INSTEAD!
     Generated by `%(command_line)s'
     (c) Eddie Kohler 1999-2000, Nick Mathewson 2003 -->
 <title>%(title)s: BibTeX</title>
 <link rel="stylesheet" type="text/css" href="%(root)s/css/main.css" />
 <link rel="stylesheet" type="text/css" href="%(root)s/css/pubs.css" />
 </head>
 <body bgcolor="#ffffff" text="#000000" link="#bb0000" vlink="#990099"
      alink="#ff9900" >
 <table cellspacing="15" border="0" align="center" width="100%%">
 %(entries)s
 </table>
 </body>
 </html>
--- a/i2p2www/anonbib/anonbib.bib
+++ b/i2p2www/anonbib/anonbib.bib
--- a/i2p2www/anonbib/anonbib.cfg
+++ b/i2p2www/anonbib/anonbib.cfg
@ -0,0 +1,163 @@
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 # Our input filename.
 MASTER_BIB = "./anonbib.bib"
 # Where do we put generated HTML?
 OUTPUT_DIR = "."
 # Where do we put cached papers (relative to OUTPUT_DIR)
 CACHE_DIR = "cache"
 # Where do we cache citations papers (relative to OUTPUT_DIR)
 CITE_CACHE_DIR = "cite_cache"
 # Are there subsections for cached papers?  This is useful for putting
 # different Apache permission on different directories.
 CACHE_SECTIONS = [ ]
 # Only include entries that have this key.  This is one way to
 # generate multiple bibliographies from the same source.  Currently
 # deprecated in favor of tags.
 #
 #example: REQUIRE_KEY = "www_selected"
 #
 REQUIRE_KEY = None
 # Timeout when downloading from a server while caching, in seconds.
 DOWNLOAD_CONNECT_TIMEOUT = 15
 # Template files.
 TEMPLATE_FILE = "./_template_.html"
 BIBTEX_TEMPLATE_FILE = "./_template_bibtex.html"
 # Map from author name regex to author homepage.
 AUTHOR_URLS = {
    'Ross.*Anderson' : 'http://www.cl.cam.ac.uk/users/rja14/',
    'Alessandro.*Acquisti' : 'http://www.heinz.cmu.edu/~acquisti/index.html',
    'Agrawal' : 'http://www.research.ibm.com/people/a/agrawal/',
    'Adam.*Back' : 'http://www.cypherspace.org/~adam/',
    'Berthold' : 'http://page.inf.fu-berlin.de/~berthold/',
    'Borisov' : 'http://hatswitch.org/~nikita/',
    'Bettati' : 'http://faculty.cs.tamu.edu/bettati/',
    'Miguel.*Castro' : 'http://research.microsoft.com/users/mcastro/',
    'Chaum' : 'http://www.chaum.com/',
    'J.*Claessens' : 'http://www.esat.kuleuven.be/~joclaess/',
    'R.*Clayton' : 'http://www.cl.cam.ac.uk/~rnc1/',
    'Wei Dai' : 'http://www.eskimo.com/~weidai/',
    'Danezis' : 'http://homes.esat.kuleuven.be/~gdanezis/',
    'Claudia.*az' : 'http://www.esat.kuleuven.be/~cdiaz/',
    'Dingledine' : 'http://www.freehaven.net/~arma/cv.html',
    'Desmedt' : 'http://www.cs.fsu.edu/~desmedt/',
    'Douceur' : 'http://research.microsoft.com/~johndo/',
    'N.*Hopper' : 'http://www-users.cs.umn.edu/~hopper/',
    'Michael.*Freedman' : 'http://www.scs.cs.nyu.edu/~mfreed/',
    'Gergely' : 'http://www.planeforge.com/home/tgm',
    'Ian.*Goldberg' : 'http://www.cs.uwaterloo.ca/~iang/',
    'Christian.*Grothoff' : 'http://grothoff.org/christian/',
    'D.*Hopwood' : 'http://www.users.zetnet.co.uk/hopwood/',
    'Jakobsson' : 'http://www2.parc.com/csl/members/mjakobss/markus-jakobsson.htm',
    'Juels' : 'http://www.rsasecurity.com/rsalabs/staff/bios/ajuels/',
    'Kaashoek' : 'http://pdos.csail.mit.edu/~kaashoek/',
    'K.*Kurosawa' : 'http://kuro.cis.ibaraki.ac.jp/~kurosawa/',
    'H.*Langos' : 'http://www.wh9.tu-dresden.de/~heinrich/',
    'B.*Liskov' : 'http://www.pmg.lcs.mit.edu/barbara_liskov.html',
    'Mathewson' : 'http://www.wangafu.net/~nickm/',
    'Mazi&egrave;res' : 'http://www.scs.cs.nyu.edu/~dm/',
    'B.*M&ouml;ller' : ('http://www.informatik.tu-darmstadt.de/TI/'
                        'Mitarbeiter/moeller.html'),
    'U.*M&ouml;ller' : 'http://www.ulfm.de/',
    'D.*Molnar' : 'http://www.cs.berkeley.edu/~dmolnar/',
    'R.*Morris' : 'http://www.pdos.lcs.mit.edu/~rtm/',
    'S.*Murdoch' : 'http://www.cl.cam.ac.uk/users/sjm217/',
    'A.*Pashalidis' : 'http://www.xrtc.com/',
    'A.*Pfitzmann' : 'http://dud.inf.tu-dresden.de/~pfitza/',
    'B.*Pfitzmann' : 'http://www.zurich.ibm.com/~bpf/',
    'B.*Preneel' : 'http://www.esat.kuleuven.be/~preneel/',
    'Daniel.*Simon' : 'http://research.microsoft.com/crypto/dansimon/me.htm',
    'Rackoff' : 'http://www.cs.toronto.edu/DCS/People/Faculty/rackoff.html',
    'Jean F' : 'http://www.geocities.com/j_f_raymond/',
    'M.*Rennhard' : 'http://www.tik.ee.ethz.ch/~rennhard/',
    'M.*Reiter' : 'http://www.ece.cmu.edu/~reiter/',
    'Rivest' : 'http://theory.lcs.mit.edu/~rivest/',
    'Avi.*Rubin' : 'http://avirubin.com/',
    'Sassaman' : 'http://homes.esat.kuleuven.be/~lsassama/',
    'Serjantov' : 'http://home.arachsys.com/~aas/',
    'S.*Seys' : 'http://www.esat.kuleuven.be/~sseys/',
    'Shoup' : 'http://www.shoup.net/',
    'Syverson' : 'http://www.syverson.org/',
    'Tsudik' : 'http://www.ics.uci.edu/~gts/c.html',
    'M.*Waidner' : 'http://www.zurich.ibm.com/~wmi/',
    'David.*Wagner' : 'http://www.cs.berkeley.edu/~daw/',
    'M.*Waldman' : 'http://cs1.cs.nyu.edu/~waldman/',
    'B.*Waters' : 'http://www.cs.utexas.edu/~bwaters/',
    'Chenxi.*Wang' : 'http://www.ece.cmu.edu/~chenxi/',
    'M.*Wright' : 'http://ranger.uta.edu/~mwright/',
    'B.*Levine' : 'http://prisms.cs.umass.edu/brian/',
    'T.*Benjamin' : 'http://www.cs.umass.edu/~tshb/',
    'B.*Defend' : 'http://www.cs.umass.edu/~defend/',
    'K.*Fu' : 'http://www.cs.umass.edu/~kevinfu/',
    'J.*Camenisch' : 'http://www.zurich.ibm.com/~jca/',
    'S.*Hohenberger' : 'http://www.cs.jhu.edu/~susan/',
    'M.*Kohlweiss' : 'http://homes.esat.kuleuven.be/~mkohlwei/',
    'A.*Lysyanskaya' : 'http://www.cs.brown.edu/~anna/',
    'M.*Meyerovich' : 'http://www.cs.brown.edu/~mira/',
    'P.*Zieli.*ski' : 'http://www.cl.cam.ac.uk/~pz215/',
    'S.*Zander' : 'http://caia.swin.edu.au/cv/szander/'
    }
 # List of paterns for author names _not_ to do an initial-tolerant
 # match on when building section list.  E.g., if "J\\. Smith" is in
 # this list, he won't be folded into "John Smith".
 NO_COLLAPSE_AUTHORS = [
 ]
 # Map from LaTeX-style name of author to collapse to canonical name.
 COLLAPSE_AUTHORS = {
    "Nicholas Mathewson": "Nick Mathewson",
    }
 # Map from author pattern to collation key.
 # This keeps 'Zero Knowledge Systems' from getting alphabetized as "Systems,
 # Zero Knowledge."
 ALPHABETIZE_AUTHOR_AS = {
    "Zero.*Knowledge.*Systems": "Zero Knowledge Systems",
    "Carlos.*Aguilar.*Melchor": "Aguilar Melchor Carlos",
    }
 # Map of strings to initialize BibTeX parsing with.
 INITIAL_STRINGS = {
    # SECTIONS
     'sec_mix' : "Mix Networks: Design",
     'sec_mixattacks' : "Mix Networks: Attacks",
     'sec_stream' : "Stream-based anonymity",
     'sec_traffic' : "Traffic analysis",
     'sec_pub' : "Anonymous publication",
     'sec_pir' : "Private Information Retrieval",
     'sec_nym' : "Pseudonymity"
 }
 # Don't put in any entries of this type.
 OMIT_ENTRIES = ("proceedings", "journal")
 # List of all recognized values for www_tags.
 ALL_TAGS = ("selected", )
 # Titles of page, by tag.
 TAG_TITLES = { "": "The Free Haven Anonymity Bibliography",
               "selected": "Free Haven's Selected Papers in Anonymity"
              }
 # As TAG_TITLES, but shorter.
 TAG_SHORT_TITLES = { "": "Anonymity Bibliography",
                     "selected": "Selected Papers in Anonymity",
                    }
 # Directories where tag pages get generated.
 TAG_DIRECTORIES = { '': "full",
                    "selected": "" }
 # Make cached stuff group-writable.  Make sure that your cache directories
 # are sticky!
 CACHE_UMASK = 002
--- a/i2p2www/anonbib/config.py
+++ b/i2p2www/anonbib/config.py
@ -0,0 +1,56 @@
 # Copyright 2003-2006, Nick Mathewson.  See LICENSE for licensing info.
 import re
 _KEYS = [ "ALL_TAGS",
          "ALPHABETIZE_AUTHOR_AS","AUTHOR_URLS","CACHE_DIR","CACHE_SECTIONS",
          "CACHE_UMASK",
          "CITE_CACHE_DIR",
          "COLLAPSE_AUTHORS",
          "DOWNLOAD_CONNECT_TIMEOUT","INITIAL_STRINGS",
          "MASTER_BIB", "NO_COLLAPSE_AUTHORS", "OMIT_ENTRIES",
          "OUTPUT_DIR", "TEMPLATE_FILE", "BIBTEX_TEMPLATE_FILE",
          "REQUIRE_KEY", "TAG_TITLES", "TAG_DIRECTORIES", "TAG_SHORT_TITLES",
          ]
 for _k in _KEYS:
    globals()[_k]=None
 del _k
 def load(cfgFile):
    mod = {}
    execfile(cfgFile, mod)
    for _k in _KEYS:
        try:
            globals()[_k]=mod[_k]
        except KeyError:
            raise KeyError("Configuration option %s is missing"%_k)
    INITIAL_STRINGS.update(_EXTRA_INITIAL_STRINGS)
    AUTHOR_RE_LIST[:] = [
        (re.compile(k, re.I), v,) for k, v in AUTHOR_URLS.items()
        ]
    NO_COLLAPSE_AUTHORS_RE_LIST[:] = [
        re.compile(pat, re.I) for pat in NO_COLLAPSE_AUTHORS
        ]
    ALPHABETIZE_AUTHOR_AS_RE_LIST[:] = [
        (re.compile(k, re.I), v,) for k,v in ALPHABETIZE_AUTHOR_AS.items()
        ]
 _EXTRA_INITIAL_STRINGS = {
    # MONTHS
     'jan' : 'January',         'feb' : 'February',
     'mar' : 'March',           'apr' : 'April',
     'may' : 'May',             'jun' : 'June',
     'jul' : 'July',            'aug' : 'August',
     'sep' : 'September',       'oct' : 'October',
     'nov' : 'November',        'dec' : 'December',
 }
 AUTHOR_RE_LIST = []
 NO_COLLAPSE_AUTHORS_RE_LIST = []
 ALPHABETIZE_AUTHOR_AS_RE_LIST = []
--- a/i2p2www/anonbib/css/main.css
+++ b/i2p2www/anonbib/css/main.css
@ -0,0 +1,111 @@
 img {
   border: 0px;
 }
 BODY {
  background-color: #FFF;
  color: #000;
  margin: 0px;
 }
 FORM {
  margin-top: 0.5em;
  margin-bottom: 0.5em;
 }
 P, TD {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
 }
 P.contact {
  text-align: center;  
 }
 P.contact A {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
  font-weight: normal;  
 }
 SPAN.email {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace; 
  font-weight: bold;
 }
 P IMG {
  vertical-align: text-bottom;
 }
 P.crumbbreadth {
  margin-top: 0.25em;
 }
 .compact {
  margin-top: -0.5em;
  text-indent: 0em;
 }
 SPAN.biblio {
  font-style: italic;
 }
 SPAN.biblio A {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
  font-weight: normal;
  text-decoration: underline;
 }
 SPAN.availability {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
  font-weight: normal;
 }
 UL {
  list-style: outside;
 }
 UL.expand {
  margin-bottom: 1em;
 }
 UL.sections {
  list-style: none;
 }
 /* Font-level properties */
 PRE {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
 }
 STRONG, A {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Rockwell", "Lucida Sans Unicode", monospace;
  font-weight: bold;
 }
 A:link {
  color: #B00;
 }
 A:visited {
  color: #903;
 }
 H1, H2, H3, H4, H5, H6 {
  font-family: lucidatypewriter, "Lucida Typewriter", "Lucida Console", Monaco, monospace;
 }
 H1 A, H2 A, H3 A, H4 A, H5 A, H6 A {
  font-family: lucidatypewriter, "Lucida Typewriter", "Lucida Console", Monaco, monospace;
 }
 H1 {
  color: #00B;
 }
 H2 {
  color: #006;
 }
 H3 {
  color: #006;
 }
--- a/i2p2www/anonbib/css/pubs.css
+++ b/i2p2www/anonbib/css/pubs.css
@ -0,0 +1,121 @@
 SPAN.title {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
  font-weight: bold;
 }
 SPAN.author {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
  font-weight: normal;
 }
 SPAN.availability {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;
  font-weight: normal;
 }
 SPAN.author A {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
  font-weight: normal;
 }
 SPAN.biblio {
  font-family: lucida, "Lucida Sans Unicode", Geneva, sans-serif;
  font-style: italic;
 }
 SPAN.biblio A {
  text-decoration: underline;
 }
 SPAN.bad {
  text-decoration: underline;
  color: #000;
  background-color: #FDF;
 }
 P.remarks {
  font-family: serif;
  margin-top: 0.3em;
  margin-bottom: 0;
  margin-left: 5em;
  padding-left: 0.5em;
  border-width: 0 0 0 5px;
  border-color: black;
  border-style: solid;
 }
 P.remarks A {
  text-decoration: underline;
 }
 P.l1 {
  margin-left: 0.5em;
 }
 P.l2 {
  margin-left: 1em;
  margin-top: 0.3em;
  margin-bottom: 0.3em;
 }
 P.entry {
  margin-top: 0.7em;
  margin-bottom: 0;
 }
 DIV.impEntry {
  border-width: 1px;
  border-color: black;
  border-style: solid;
  background-color: #FFE;
  padding: 0.3em;
  margin-top: 0.7em;
  margin-bottom: 0;
 }  
 P.impEntry {
  background-color: #FFE;
  padding: 0;
  margin-top: 0;
  margin-bottom: 0;
 }
 DIV.draftEntry {
 /*
  border-width: 1px;
  border-color: black;
  border-style: solid;
  padding: 0.3em;
  margin-top: 0.7em;
  margin-bottom: 0;
 */
 }  
 P.draftEntry {
 color: #555;
  padding: 0;
  margin-top: 0;
  margin-bottom: 0;
 }
 TABLE.sidebar {
  border-width: 2px;
  border-color: black;
  border-style: solid;
  background-color: #CFF;
 }
 TD.bibtex {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace;  
  border-width: 2px;
  font-weight: normal; 
  border-color: black;
  border-style: solid;
  background-color: #DFF;
 }
 PRE.bibtex {
  font-family: lucidatypewriter, "Lucida Typewriter", Monaco, "Lucida Sans Unicode", monospace; 
  font-size: smaller;
 }
--- a/i2p2www/anonbib/gold.gif
+++ b/i2p2www/anonbib/gold.gif
--- a/i2p2www/anonbib/metaphone.py
+++ b/i2p2www/anonbib/metaphone.py
@ -0,0 +1,193 @@
 #!/usr/bin/python2
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 """metaphone.py -- Pure-python metaphone implementation.
   (This is not guaranteed to match the real metaphone algorithm; I
   haven't tested it thorougly enough.  Let me know if you find bugs.
   Based on the original C++ metaphone implementation.)
 """
 TRIPLES = {
    'dge': 'j',
    'dgi': 'j',
    'dgy': 'j',
    'sia': '+x',
    'sio': '+x',
    'tia': '+x',
    'tio': '+x',
    'tch': '',
    'tha': '0',
    'the': '0',
    'thi': '0',
    'tho': '0',
    'thu': '0',
    }
 DOUBLES = {
    'ph' : 'f',
    'sh' : 'x'
    }
 SINGLETONS = {
    'd': 't',
    'f': 'f',
    'j': 'j',
    'l': 'l',
    'm': 'm',
    'n': 'n',
    'r': 'r',
    'p': 'p',
    'q': 'k',
    'v': 'f',
    'x': 'ks',
    'z': 's',
 }
 ALLCHARS = "".join(map(chr, range(256)))
 NONLCCHARS = "".join([c for c in ALLCHARS if not c.islower()])
 def metaphone(s):
    """Return the metaphone equivalent of a provided string"""
    s = s.lower()
    s = s.translate(ALLCHARS, NONLCCHARS)
    if not s: return ""
    # If ae, gn, kn, pn, wr then drop the first letter.
    if s[:2] in ("ae", "gn", "kn", "pn", "wr"):
        s = s[1:]
    # Change "x" to "s"
    if s[0] == 'x':
        s = "s%s" % s[1:]
    # Get rid of "h" in "wh".
    if s[:2] == 'wh':
        s = "w%s" % s[1:]
    # Get rid of s from end.
    if s[-1] == 's':
        s = s[:-1]
    result = []
    prevLtr = ' '
    vowelBefore = 0
    lastChar = len(s)-1
    for idx in range(len(s)):
        curLtr = s[idx]
        # If first char is a vowel, keep it.
        if curLtr in "aeiou":
            if idx == 0:
                result.append(curLtr)
            continue
        # Skip double letters.
        if idx < lastChar:
            if curLtr == s[idx+1]:
                continue
        try:
            r = TRIPLES[s[idx:idx+3]]
            if r == "+x":
                if idx > 1:
                    result.append("x")
                    continue
            else:
                result.append(r)
                continue
        except KeyError:
            pass
        try:
            r = DOUBLES[s[idx:idx+2]]
            result.append(r)
            continue
        except KeyError:
            pass
        try:
            r = SINGLETONS[s[idx]]
            result.append(r)
            continue
        except KeyError:
            pass
        if idx > 0:
            prevLtr = s[idx-1]
            vowelBefore = prevLtr in "aeiou"
        curLtr = s[idx]
        nextLtr2 = ' '
        if idx < lastChar:
            nextLtr = s[idx+1]
            vowelAfter = nextLtr in "aeiou"
            frontvAfter = nextLtr in "eiy"
            if idx+1 < lastChar:
                nextLtr2 = s[idx+2]
        else:
            nextLtr = ' '
            vowelAfter = frontvAfter = 0
        if curLtr == 'b':
            if idx == lastChar and prevLtr == 'm':
                pass
            else:
                result.append(curLtr)
        elif curLtr == 'c':
            # silent 'sci', 'sce, 'scy', 'sci', etc OK.
            if not (prevLtr == 's' and frontvAfter):
                if nextLtr in 'ia':
                    result.append("x")
                elif frontvAfter:
                    result.append("s")
                elif prevLtr == 's' and nextLtr == 'h':
                    result.append('k')
                elif nextLtr == 'h':
                    if idx == 0 and nextLtr2 in "aeiou":
                        result.append('k')
                    else:
                        result.append('x')
                elif prevLtr == 'c':
                    result.append('c')
                else:
                    result.append('k')
        elif curLtr == 'g':
            if (idx < lastChar-1) and nextLtr == 'h':
                pass
            elif s[idx:] == 'gned':
                pass
            elif s[idx:] == 'gn':
                pass
            elif prevLtr == 'd' and frontvAfter:
                pass
            else:
                hard = (prevLtr == 'g')
                if frontvAfter and not hard:
                    result.append('j')
                else:
                    result.append('k')
        elif curLtr == 'h':
            if prevLtr in 'csptg':
                pass
            elif vowelBefore and not vowelAfter:
                pass
            else:
                result.append('h')
        elif curLtr == 'k':
            if prevLtr != 'c': result.append('k')
        elif curLtr in 'wy':
            if vowelAfter:
                result.append(curLtr)
    return "".join(result)
 def demo(a):
    print a, "=>", metaphone(a)
 if __name__ == '__main__':
    demo("Nick. Mathewson")
    demo("joe schmidt")
    demo("Beethoven")
    demo("Because the world is round")
--- a/i2p2www/anonbib/rank.py
+++ b/i2p2www/anonbib/rank.py
@ -0,0 +1,202 @@
 # Make rankings of papers and authors for automatic classification of content hotness
 # Google Scholar address
 # http://scholar.google.com/scholar?as_epq=
 # Take care of the caching setup
 cache_expire = 60*60*24*30 # 30 days
 # Checks
 import config
 import os
 import sys
 from os.path import exists, isdir, join, getmtime
 from os import listdir, remove
 def remove_old():
   # Remove all old cached files
   filenames = listdir(cache_folder())
   from time import time
   now = time()
   for f in filenames:
      pf = join(cache_folder(), f)
      time_mt =  getmtime(pf)
      if now - time_mt > cache_expire: # 30 days
         remove(pf)
 def cache_folder():
   r = join(config.OUTPUT_DIR, config.CITE_CACHE_DIR)
   if not exists(r):
      os.makedirs(r)
   assert isdir(r)
   return r
 import re
 from urllib2 import urlopen, build_opener
 from urllib import quote
 from datetime import date
 import hashlib
 # A more handy hash
 def md5h(s):
   m = hashlib.md5()
   m.update(s)
   return m.hexdigest()
 format_tested = 0
 def getPageForTitle(title, cache=True, update=True, save=True):
   #Returns (citation-count, scholar url) tuple, or (None,None)
   global format_tested
   if not format_tested and update:
      format_tested = 1
      TestScholarFormat()
   # Do not assume that the title is clean
   title = re.sub("\s+", " ", title)
   title = re.sub("[^'a-zA-Z0-9\. \-\/:]", "", title)
   title = re.sub("'\/", " ", title)
   # We rely on google scholar to return the article with this exact title
   gurl = "http://scholar.google.com/scholar?as_q=&as_epq=%s&as_occt=title"
   url = gurl % quote(title)
   # Access cache or network
   if exists(join(cache_folder(), md5h(url))) and cache:
      return url, file(join(cache_folder(), md5h(url)),'r').read()
   elif update:
      print "Downloading rank for %r."%title
      # Make a custom user agent (so that we are not filtered by Google)!
      opener = build_opener()
      opener.addheaders = [('User-agent', 'Anon.Bib.0.1')]
      print "connecting..."
      connection = opener.open(url)
      print "reading"
      page = connection.read()
      print "done"
      if save:
         file(join(cache_folder(), md5h(url)),'w').write(page)
      return url, page
   else:
      return url, None
 def getCite(title, cache=True, update=True, save=True):
   url, page = getPageForTitle(title, cache=cache, update=update, save=save)
   if not page:
      return None,None
   # Check if it finds any articles
   if len(re.findall("did not match any articles", page)) > 0:
      return (None, None)
   # Kill all tags!
   cpage = re.sub("<[^>]*>", "", page)
   # Add up all citations
   s = sum([int(x) for x in re.findall("Cited by ([0-9]*)", cpage)])
   return (s, url)
 def getPaperURLs(title, cache=True, update=True, save=True):
   url, page = getPageForTitle(title, cache=cache, update=update, save=save)
   if not page:
      return []
   pages = re.findall(r'\&\#x25ba\;.*class=fl href="([^"]*)"', page)
   return pages
 def get_rank_html(title, years=None, base_url=".", update=True,
                  velocity=False):
   s,url = getCite(title, update=update)
   # Paper cannot be found
   if s is None:
      return ''
   html = ''
   url = url.replace("&","&amp;")
   # Hotness
   H,h = 50,5
   if s >= H:
      html += '<a href="%s"><img src="%s/gold.gif" alt="More than %s citations on Google Scholar" title="More than %s citations on Google Scholar" /></a>' % (url,base_url,H,H)
   elif s >= h:
      html += '<a href="%s"><img src="%s/silver.gif" alt="More than %s citations on Google Scholar" title="More than %s citations on Google Scholar" /></a>' % (url,base_url,h,h)
   # Only include the velocity if asked.
   if velocity:
      # Velocity
      d = date.today().year - int(years)
      if d >= 0:
         if 2 < s / (d +1) < 10:
            html += '<img src="%s/ups.gif" />' % base_url
         if 10 <= s / (d +1):
            html += '<img src="%s/upb.gif" />' % base_url
   return html
 def TestScholarFormat():
   # We need to ensure that Google Scholar does not change its page format under our feet
   # Use some cases to check if all is good
   print "Checking google scholar formats..."
   stopAndGoCites = getCite("Stop-and-Go MIXes: Providing Probabilistic Anonymity in an Open System", False)[0]
   dragonCites = getCite("Mixes protected by Dragons and Pixies: an empirical study", False, save=False)[0]
   if stopAndGoCites in (0, None):
      print """OOPS.\n
 It looks like Google Scholar changed their URL format or their output format.
 I went to count the cites for the Stop-and-Go MIXes paper, and got nothing."""
      sys.exit(1)
   if dragonCites != None:
      print """OOPS.\n
 It looks like Google Scholar changed their URL format or their output format.
 I went to count the cites for a fictitious paper, and found some."""
      sys.exit(1)
 def urlIsUseless(u):
   if u.find("freehaven.net/anonbib/") >= 0:
      # Our own cache is not the primary citation for anything.
      return True
   elif u.find("owens.mit.edu") >= 0:
      # These citations only work for 'members of the MIT community'.
      return True
   else:
      return False
 URLTYPES=[ "pdf", "ps", "txt", "ps_gz", "html" ]
 if __name__ == '__main__':
   # First download the bibliography file.
   import BibTeX
   suggest = False
   if sys.argv[1] == 'suggest':
      suggest = True
      del sys.argv[1]
   config.load(sys.argv[1])
   if config.CACHE_UMASK != None:
      os.umask(config.CACHE_UMASK)
   bib = BibTeX.parseFile(config.MASTER_BIB)
   remove_old()
   print "Downloading missing ranks."
   for ent in bib.entries:
      getCite(ent['title'], cache=True, update=True)
   if suggest:
      for ent in bib.entries:
         haveOne = False
         for utype in URLTYPES:
            if ent.has_key("www_%s_url"%utype):
               haveOne = True
               break
         if haveOne:
            continue
         print ent.key, "has no URLs given."
         urls = [ u for u in getPaperURLs(ent['title']) if not urlIsUseless(u) ]
         for u in urls:
            print "\t", u
--- a/i2p2www/anonbib/reconcile.py
+++ b/i2p2www/anonbib/reconcile.py
@ -0,0 +1,292 @@
 #!/usr/bin/python2
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 """Code to determine which entries are new and which are old.
   To scan a new file, run "python reconcile.py anonbib.cfg new-file.bib".  This
   will generate a new bibtex file called 'tmp.bib', with all the new entries
   cleaned up a little, and all the duplicate entries commented out.
 """
 import sys
 import re
 assert sys.version_info[:3] >= (2,2,0)
 import BibTeX
 import config
 import metaphone
 _MPCACHE = {}
 def soundsLike(s1, s2):
    c = _MPCACHE
    s1 = clean(s1)
    s2 = clean(s2)
    try:
        m1 = c[s1]
    except KeyError:
        m1 = c[s1] = metaphone.metaphone(s1)
    try:
        m2 = c[s2]
    except KeyError:
        m2 = c[s2] = metaphone.metaphone(s2)
    return m1 == m2
 def mphone(s):
    c = _MPCACHE
    s = clean(s)
    try:
        return c[s]
    except:
        m = c[s] = metaphone.metaphone(s)
        return m
 def clean(s):
    s = re.sub(r'\s+', ' ', s)
    s = s.strip()
    return s
 class MasterBibTeX(BibTeX.BibTeX):
    def __init__(self):
        BibTeX.BibTeX.__init__(self)
    def buildIndex(self):
        self.byTitle = {}
        for ent in self.entries:
            for t in self._titleForms(ent['title']):
                self.byTitle.setdefault(t, []).append(ent)
    def _titleForms(self, title):
        title = title.lower()
        title = re.sub(r'\b(an|a|the|of)\b', "", title)
        title = clean(title)
        res = [ mphone(title) ]
        if ':' in title:
            for t in title.split(":"):
                res.append(mphone(t.strip()))
        #print "%r\n   => %s" % (title,res)
        return res
    def _titlesAlike(self, t1, t2):
        t1 = clean(t1)
        t2 = clean(t2)
        if t1 == t2:
            return 2
        tf1 = self._titleForms(t1)
        tf2 = self._titleForms(t2)
        for t in tf1:
            if t in tf2: return 1
        return 0
    def _authorsAlike(self, a1, a2):
        if not soundsLike(" ".join(a1.last)," ".join(a2.last)):
            return 0
        if (a1.first == a2.first and a1.von == a2.von
            and a1.jr == a2.jr):
            return 2
        if soundsLike(" ".join(a1.first), " ".join(a2.first)):
            return 1
        if not a1.first or not a2.first:
            return 1
        if self._initialize(a1.first) == self._initialize(a2.first):
            return 1
        return 0
    def _initialize(self, name):
        name = " ".join(name).lower()
        name = re.sub(r'([a-z])[a-z\.]*', r'\1', name)
        name = clean(name)
        return name
    def _authorListsAlike(self, a1, a2):
        if len(a1) != len(a2):
            return 0
        a1 = [ (a.last, a) for a in a1 ]
        a2 = [ (a.last, a) for a in a2 ]
        a1.sort()
        a2.sort()
        if len(a1) != len(a2):
            return 0
        r = 2
        for (_, a1), (_, a2) in zip(a1,a2):
            x = self._authorsAlike(a1,a2)
            if not x:
                return 0
            elif x == 1:
                r = 1
        return r
    def _entryDatesAlike(self, e1, e2):
        try:
            if clean(e1['year']) == clean(e2['year']):
                return 2
            else:
                return 0
        except KeyError:
            return 1
    def includes(self, ent, all=0):
        title = ent['title']
        candidates = []
        for form in self._titleForms(title):
            try:
                candidates.extend(self.byTitle[form])
            except KeyError:
                pass
        goodness = []
        for knownEnt in candidates:
            match = (self._entryDatesAlike(ent, knownEnt) *
                     self._titlesAlike(ent['title'], knownEnt['title']) *
                     self._authorListsAlike(ent.parsedAuthor,
                                            knownEnt.parsedAuthor) )
            if match:
                goodness.append((match, knownEnt))
        goodness.sort()
        if all:
            return goodness
        if goodness:
            return goodness[-1]
        else:
            return None, None
    def demo(self):
        for e in self.entries:
            matches = self.includes(e, 1)
            m2 = []
            mids = []
            for g,m in matches:
                if id(m) not in mids:
                    mids.append(id(m))
                    m2.append((g,m))
            matches = m2
            if not matches:
                print "No match for %s"%e.key
            if matches[-1][1] is e:
                print "%s matches for %s: OK."%(len(matches), e.key)
            else:
                print "%s matches for %s: %s is best!" %(len(matches), e.key,
                                                         matches[-1][1].key)
            if len(matches) > 1:
                for g, m in matches:
                    print "%%%% goodness", g
                    print m
 def noteToURL(note):
    " returns tp, url "
    note = note.replace("\n", " ")
    m = re.match(r'\s*(?:\\newline\s*)*\s*\\url{(.*)}\s*(?:\\newline\s*)*',
                 note)
    if not m:
        return None
    url = m.group(1)
    for suffix, tp in ((".html", "html"),
                       (".ps", "ps"),
                       (".ps.gz", "ps_gz"),
                       (".pdf", "pdf"),
                       (".txt", "txt")):
        if url.endswith(suffix):
            return tp,url
    return "???", url
 all_ok = 1
 def emit(f,ent):
    global all_ok
    errs = ent._check()
    if master.byKey.has_key(ent.key.strip().lower()):
        errs.append("ERROR: Key collision with master file")
    if errs:
        all_ok = 0
    note = ent.get("note")
    if ent.getURL() and not note:
        ent['note'] = "\url{%s}"%ent.getURL()
    elif note:
        m = re.match(r'\\url{(.*)}', note)
        if m:
            url = m.group(0)
            tp = None
            if url.endswith(".txt"):
                tp = "txt"
            elif url.endswith(".ps.gz"):
                tp = "ps_gz"
            elif url.endswith(".ps"):
                tp = "ps_gz"
            elif url.endswith(".pdf"):
                tp = "pdf"
            elif url.endswith(".html"):
                tp = "html"
            if tp:
                ent['www_%s_url'%tp] = url
    if errs:
        all_ok = 0
    for e in errs:
        print >>f, "%%%%", e
    print >>f, ent.format(77, 4, v=1, invStrings=invStrings)
 def emitKnown(f, ent, matches):
    print >>f, "%% Candidates are:", ", ".join([e.key for g,e in matches])
    print >>f, "%%"
    print >>f, "%"+(ent.format(77,4,1,invStrings).replace("\n", "\n%"))
 if __name__ == '__main__':
    if len(sys.argv) != 3:
        print "reconcile.py expects 2 arguments"
        sys.exit(1)
    config.load(sys.argv[1])
    print "========= Scanning master =========="
    master = MasterBibTeX()
    master = BibTeX.parseFile(config.MASTER_BIB, result=master)
    master.buildIndex()
    print "========= Scanning new file ========"
    try:
        fn = sys.argv[2]
        input = BibTeX.parseFile(fn)
    except BibTeX.ParseError, e:
        print "Error parsing %s: %s"%(fn,e)
        sys.exit(1)
    f = open('tmp.bib', 'w')
    keys = input.newStrings.keys()
    keys.sort()
    for k in keys:
        v = input.newStrings[k]
        print >>f, "@string{%s = {%s}}"%(k,v)
    invStrings = input.invStrings
    for e in input.entries:
        if not (e.get('title') and e.get('author')):
            print >>f, "%%\n%%%% Not enough information to search for a match: need title and author.\n%%"
            emit(f, e)
            continue
        matches = master.includes(e, all=1)
        if not matches:
            print >>f, "%%\n%%%% This entry is probably new: No match found.\n%%"
            emit(f, e)
        else:
            print >>f, "%%"
            print >>f, "%%%% Possible match found for this entry; max goodness",\
                  matches[-1][0], "\n%%"
            emitKnown(f, e, matches)
    if not all_ok:
        print >>f, "\n\n\nErrors remain; not finished.\n"
    f.close()
--- a/i2p2www/anonbib/silver.gif
+++ b/i2p2www/anonbib/silver.gif
--- a/i2p2www/anonbib/testbib/pdos.bib
+++ b/i2p2www/anonbib/testbib/pdos.bib
--- a/i2p2www/anonbib/tests.py
+++ b/i2p2www/anonbib/tests.py
@ -0,0 +1,86 @@
 #!/usr/bin/python2
 # Copyright 2004-2008, Nick Mathewson.  See LICENSE for licensing info.
 """Unit tests for anonbib."""
 import BibTeX
 import metaphone
 #import reconcile
 #import writeHTML
 #import updateCache
 import unittest
 class MetaphoneTests(unittest.TestCase):
    def testMetaphone(self):
        pass
 class BibTeXTests(unittest.TestCase):
    def testTranslation(self):
        ut = BibTeX.url_untranslate
        self.assertEquals(ut("Fred"),"Fred")
        self.assertEquals(ut("Hello, World."), "Hello_2c_20World.")
        te = BibTeX.TeXescapeURL
        ute = BibTeX.unTeXescapeURL
        self.assertEquals(te("http://example/~me/my_file"),
                          r"http://example/\{}~me/my\_file")
        self.assertEquals(ute(r"http:{}//example/\{}~me/my\_file"),
                          "http://example/~me/my_file")
        h = BibTeX.htmlize
        self.assertEquals(h("Hello, world"), "Hello, world")
        self.assertEquals(h(r"\'a\`e\'{i}(\'\i)\"o&\^u"),
                          "&aacute;&egrave;&iacute;(&iacute;)&ouml;&amp;"
                          "&ucirc;")
        self.assertEquals(h(r"\~n and \c{c}"), "&ntilde; and &ccedil;")
        self.assertEquals(h(r"\AE---a ligature"), "&AElig;&mdash;a ligature")
        self.assertEquals(h(r"{\it 33}"), " 33")
        self.assertEquals(h(r"Pages 33--99 or vice--versa?"),
                          "Pages 33-99 or vice&ndash;versa?")
        t = BibTeX.txtize
        self.assertEquals(t("Hello, world"), "Hello, world")
        self.assertEquals(t(r"\'a\`e\'{i}(\'\i)\"o&\^u"),
                          "aei(i)o&u")
        self.assertEquals(t(r"\~n and \c{c}"), "n and c")
        self.assertEquals(t(r"\AE---a ligature"), "AE---a ligature")
        self.assertEquals(t(r"{\it 33}"), " 33")
        self.assertEquals(t(r"Pages 33--99 or vice--versa?"),
                          "Pages 33--99 or vice--versa?")
    def authorsParseTo(self,authors,result):
        pa = BibTeX.parseAuthor(authors)
        self.assertEquals(["|".join(["+".join(item) for item in
                                     [a.first,a.von,a.last,a.jr]])
                           for a in pa],
                          result)
    def testAuthorParsing(self):
        pa = BibTeX.parseAuthor
        PA = BibTeX.ParsedAuthor
        apt = self.authorsParseTo
        apt("Nick A. Mathewson and Roger Dingledine",
            ["Nick+A.||Mathewson|", "Roger||Dingledine|"])
        apt("John van Neumann", ["John|van|Neumann|"])
        apt("P. Q. Z. de la Paz", ["P.+Q.+Z.|de+la|Paz|"])
        apt("Cher", ["||Cher|"])
        apt("Smith, Bob", ["Bob||Smith|"])
        apt("de Smith, Bob", ["Bob|de|Smith|"])
        apt("de Smith, Bob Z", ["Bob+Z|de|Smith|"])
        #XXXX Fix this.
        #apt("Roberts Smith Wilkins, Bob Z", ["Bob+Z||Smith+Wilkins|"])
        apt("Smith, Jr, Bob", ["Bob||Smith|Jr"])
        #XXXX Fix this.
        #apt("R Jones, Jr.", ["R||Jones|Jr."])
        apt("Smith, Bob and John Smith and Last,First",
            ["Bob||Smith|", "John||Smith|", "First||Last|"])
        apt("Bob Smith and John Smith and John Doe",
            ["Bob||Smith|", "John||Smith|", "John||Doe|"])
 if __name__ == '__main__':
    unittest.main()
--- a/i2p2www/anonbib/upb.gif
+++ b/i2p2www/anonbib/upb.gif
--- a/i2p2www/anonbib/updateCache.py
+++ b/i2p2www/anonbib/updateCache.py
@ -0,0 +1,169 @@
 #!/usr/bin/python
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 """Download files in bibliography into a local cache.
 """
 import os
 import sys
 import signal
 import time
 import gzip
 import BibTeX
 import config
 import urllib2
 import getopt
 import socket
 import errno
 import httplib
 FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ]
 BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ]
 class UIError(Exception):
    pass
 def tryUnlink(fn):
    try:
        os.unlink(fn)
    except OSError:
        pass
 def getCacheFname(key, ftype, section):
    return BibTeX.smartJoin(config.OUTPUT_DIR,config.CACHE_DIR,
                            section,
                            "%s.%s"%(key,ftype))
 def downloadFile(key, ftype, section, url,timeout=None):
    if timeout is None:
        timeout = config.DOWNLOAD_CONNECT_TIMEOUT
    fname = getCacheFname(key, ftype, section)
    parent = os.path.split(fname)[0]
    if not os.path.exists(parent):
        os.makedirs(parent)
    fnameTmp = fname+".tmp"
    fnameURL = fname+".url"
    tryUnlink(fnameTmp)
    def sigalrmHandler(sig,_):
        pass
    signal.signal(signal.SIGALRM, sigalrmHandler)
    signal.alarm(timeout)
    try:
        try:
            infile = urllib2.urlopen(url)
        except httplib.InvalidURL, e:
            raise UIError("Invalid URL %s: %s"%(url,e))
        except IOError, e:
            raise UIError("Cannot connect to url %s: %s"%(url,e))
        except socket.error, e:
            if getattr(e,"errno",-1) == errno.EINTR:
                raise UIError("Connection timed out to url %s"%url)
            else:
                raise UIError("Error connecting to %s: %s"%(url, e))
    finally:
        signal.alarm(0)
    mode = 'w'
    if ftype in BIN_FILE_TYPES:
        mode = 'wb'
    outfile = open(fnameTmp, mode)
    try:
        while 1:
            s = infile.read(1<<16)
            if not s: break
            outfile.write(s)
    finally:
        infile.close()
        outfile.close()
    urlfile = open(fnameURL, 'w')
    print >>urlfile, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    if "\n" in url: url = url.replace("\n", " ")
    print >>urlfile, url
    urlfile.close()
    os.rename(fnameTmp, fname)
 def getURLs(entry):
    r = {}
    for ftype in FILE_TYPES:
        ftype2 = ftype.replace(".", "_")
        url = entry.get("www_%s_url"%ftype2)
        if url:
            r[ftype] = url.strip().replace("\n", " ")
    return r
 def getCachedURL(key, ftype, section):
    fname = getCacheFname(key, ftype, section)
    urlFname = fname+".url"
    if not os.path.exists(fname) or not os.path.exists(urlFname):
        return None
    f = open(urlFname, 'r')
    lines = f.readlines()
    f.close()
    if len(lines) != 2:
        print >>sys.stderr, "ERROR: unexpected number of lines in", urlFname
    return lines[1].strip()
 def downloadAll(bibtex, missingOnly=0):
    """returns list of tuples of key, ftype, url, error"""
    errors = []
    for e in bibtex.entries:
        urls = getURLs(e)
        key = e.key
        section = e.get("www_cache_section", ".")
        for ftype, url in urls.items():
            if missingOnly:
                cachedURL = getCachedURL(key, ftype, section)
                if cachedURL == url:
                    print >>sys.stderr,"Skipping",url
                    continue
                elif cachedURL is not None:
                    print >>sys.stderr,"URL for %s.%s has changed"%(key,ftype)
                else:
                    print >>sys.stderr,"I have no copy of %s.%s"%(key,ftype)
            try:
                downloadFile(key, ftype, section, url)
                print "Downloaded",url
            except UIError, e:
                print >>sys.stderr, str(e)
                errors.append((key,ftype,url,str(e)))
            except (IOError, socket.error), e:
                msg = "Error downloading %s: %s"%(url,str(e))
                print >>sys.stderr, msg
                errors.append((key,ftype,url,msg))
        if urls.has_key("ps") and not urls.has_key("ps.gz"):
            # Say, this is something we'd like to have gzipped locally.
            psFname = getCacheFname(key, "ps", section)
            psGzFname = getCacheFname(key, "ps.gz", section)
            if os.path.exists(psFname) and not os.path.exists(psGzFname):
                # This is something we haven't gzipped yet.
                print "Compressing a copy of",psFname
                outf = gzip.GzipFile(psGzFname, "wb")
                inf = open(psFname, "rb")
                while 1:
                    s = inf.read(4096)
                    if not s:
                        break
                    outf.write(s)
                outf.close()
                inf.close()
    return errors
 if __name__ == '__main__':
    if len(sys.argv) == 2:
        print "Loading from %s"%sys.argv[1]
    else:
        print >>sys.stderr, "Expected a single configuration file as an argument"
        sys.exit(1)
    config.load(sys.argv[1])
    if config.CACHE_UMASK != None:
        os.umask(config.CACHE_UMASK)
    bib = BibTeX.parseFile(config.MASTER_BIB)
    downloadAll(bib,missingOnly=1)
--- a/i2p2www/anonbib/ups.gif
+++ b/i2p2www/anonbib/ups.gif
--- a/i2p2www/anonbib/venue-checklist.txt
+++ b/i2p2www/anonbib/venue-checklist.txt
@ -0,0 +1,41 @@
 This file is to keep track of which volumes of which publications have
 been combed for anonymity papers and which we still have to add.
 === DONE:
 ExampleConference (through 2008)
 PETS 2000-2003
 === CLAIMED:
 PETS 2000-2010 -- Nick (claimed 6/16)
 ESORICS 1990-2010 -- Nick (claimed 6/16)
 CCS -- George (claimed 6/17)
 USENIX Security ("Oakland") -- George (claimed 6/17)
 === SHOULD DO:
 Infohiding
 IEEE Security and privacy
 NDSS
 WPES
 WEIS
 Financial Crypto
 Eurocrypt
 Asiacrypt
 Search: Papers that cite Chaum's paper
 Search: Papers that cite the Tor paper
 Search: Papers that cite the original onion routing papers
 Search: Papers mentioning "anonymity" or "anonymous"
 Search: Papers mentioning "mixnet" or "mix-net"
 === UNDERSERVED CONTENT; PLEASE SUGGEST SEARCHES AND VENUES
 Private information retrieval; PIR
 Anti-censorship; censorship
 Location privacy
 Anonymous credentials
 Anonymizing data
 Secure multiparty computation
--- a/i2p2www/anonbib/writeHTML.py
+++ b/i2p2www/anonbib/writeHTML.py
@ -0,0 +1,246 @@
 #!/usr/bin/python
 # Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
 """Generate indices by author, topic, date, and BibTeX key."""
 import sys
 import re
 import os
 import json
 assert sys.version_info[:3] >= (2,2,0)
 os.umask(022)
 import BibTeX
 import config
 def getTemplate(name):
    f = open(name)
    template = f.read()
    f.close()
    template_s, template_e = template.split("%(entries)s")
    return template_s, template_e
 def pathLength(s):
    n = 0
    while s:
        parent, leaf = os.path.split(s)
        if leaf != '' and leaf != '.':
            n += 1
        s = parent
    return n
 def writeBody(f, sections, section_urls, cache_path, base_url):
    '''f: an open file
       sections: list of (sectionname, [list of BibTeXEntry])
       section_urls: map from sectionname to external url'''
    for s, entries in sections:
        u = section_urls.get(s)
        sDisp = re.sub(r'\s+', ' ', s.strip())
        sDisp = sDisp.replace(" ", "&nbsp;")
        if u:
            print >>f, ('<li><h3><a name="%s"></a><a href="%s">%s</a></h3>'%(
                (BibTeX.url_untranslate(s), u, sDisp)))
        else:
            print >>f, ('<li><h3><a name="%s">%s</a></h3>'%(
                BibTeX.url_untranslate(s),sDisp))
        print >>f, "<ul class='expand'>"
        for e in entries:
            print >>f, e.to_html(cache_path=cache_path, base_url=base_url)
        print >>f, "</ul></li>"
 def writeHTML(f, sections, sectionType, fieldName, choices,
              tag, config, cache_url_path, section_urls={}):
    """sections: list of (sectionname, [list of BibTeXEntry])'''
       sectionType: str
       fieldName: str
       choices: list of (choice, url)"""
    title = config.TAG_TITLES[tag]
    short_title = config.TAG_SHORT_TITLES[tag]
    #
    secStr = []
    for s, _ in sections:
        hts = re.sub(r'\s+', ' ', s.strip())
        hts = s.replace(" ", "&nbsp;")
        secStr.append("<p class='l2'><a href='#%s'>%s</a></p>\n"%
                      ((BibTeX.url_untranslate(s),hts)))
    secStr = "".join(secStr)
    #
    tagListStr = []
    st = config.TAG_SHORT_TITLES.keys()
    st.sort()
    root = "../"*pathLength(config.TAG_DIRECTORIES[tag])
    if root == "": root = "."
    for t in st:
        name = config.TAG_SHORT_TITLES[t]
        if t == tag:
            tagListStr.append(name)
        else:
            url = BibTeX.smartJoin(root, config.TAG_DIRECTORIES[t], "date.html")
            tagListStr.append("<a href='%s'>%s</a>"%(url, name))
    tagListStr = "&nbsp;|&nbsp;".join(tagListStr)
    #
    choiceStr = []
    for choice, url in choices:
        if url:
            choiceStr.append("<a href='%s'>%s</a>"%(url, choice))
        else:
            choiceStr.append(choice)
    choiceStr = ("&nbsp;|&nbsp;".join(choiceStr))
    fields = { 'command_line' :  "",
               'sectiontypes' :  sectionType,
               'choices' : choiceStr,
               'field': fieldName,
               'sections' : secStr,
               'otherbibs' : tagListStr,
               'title': title,
               'short_title': short_title,
               "root" : root,
         }
    header, footer = getTemplate(config.TEMPLATE_FILE)
    print >>f, header%fields
    writeBody(f, sections, section_urls, cache_path=cache_url_path,
              base_url=root)
    print >>f, footer%fields
 def jsonDumper(obj):
    if isinstance(obj, BibTeX.BibTeXEntry):
        e = obj.entries.copy()
        e['key'] = obj.key
        return e
    else:
        raise TypeError("Do not know how to serialize %s"%(obj.__class,))
 def writePageSet(config, bib, tag):
    if tag:
        bib_entries = [ b for b in bib.entries
                          if tag in b.get('www_tags', "").split() ]
    else:
        bib_entries = bib.entries[:]
    if not bib_entries:
        print >>sys.stderr, "No entries with tag %r; skipping"%tag
        return
    tagdir = config.TAG_DIRECTORIES[tag]
    outdir = os.path.join(config.OUTPUT_DIR, tagdir)
    cache_url_path = BibTeX.smartJoin("../"*pathLength(tagdir),
                                      config.CACHE_DIR)
    if not os.path.exists(outdir):
        os.makedirs(outdir, 0755)
    ##### Sorted views:
    ## By topic.
    entries = BibTeX.sortEntriesBy(bib_entries, "www_section", "ZZZZZZZZZZZZZZ")
    entries = BibTeX.splitSortedEntriesBy(entries, "www_section")
    if entries[-1][0].startswith("<span class='bad'>"):
        entries[-1] = ("Miscellaneous", entries[-1][1])
    entries = [ (s, BibTeX.sortEntriesByDate(ents))
                for s, ents in entries
                ]
    f = open(os.path.join(outdir,"topic.html"), 'w')
    writeHTML(f, entries, "Topics", "topic",
              (("By topic", None),
               ("By date", "./date.html"),
               ("By author", "./author.html")
               ),
              tag=tag, config=config,
              cache_url_path=cache_url_path)
    f.close()
    ## By date.
    entries = BibTeX.sortEntriesByDate(bib_entries)
    entries = BibTeX.splitSortedEntriesBy(entries, 'year')
    for idx in -1, -2:
        if entries[idx][0].startswith("<span class='bad'>"):
            entries[idx] = ("Unknown", entries[idx][1])
        elif entries[idx][0].startswith("forthcoming"):
            entries[idx] = ("Forthcoming", entries[idx][1])
    sections = [ ent[0] for ent in entries ]
    first_year = int(entries[0][1][0]['year'])
    try:
        last_year = int(entries[-1][1][0].get('year'))
    except ValueError:
        last_year = int(entries[-2][1][0].get('year'))
    years = map(str, range(first_year, last_year+1))
    if entries[-1][0] == 'Unknown':
        years.append("Unknown")
    f = open(os.path.join(outdir,"date.html"), 'w')
    writeHTML(f, entries, "Years", "date",
              (("By topic", "./topic.html"),
               ("By date", None),
               ("By author", "./author.html")
               ),
              tag=tag, config=config,
              cache_url_path=cache_url_path)
    f.close()
    ## By author
    entries, url_map = BibTeX.splitEntriesByAuthor(bib_entries)
    f = open(os.path.join(outdir,"author.html"), 'w')
    writeHTML(f, entries, "Authors", "author",
              (("By topic", "./topic.html"),
               ("By date", "./date.html"),
               ("By author", None),
              ),
              tag=tag, config=config,
              cache_url_path=cache_url_path,
              section_urls=url_map)
    f.close()
    ## The big BibTeX file
    entries = bib_entries[:]
    entries = [ (ent.key, ent) for ent in entries ]
    entries.sort()
    entries = [ ent[1] for ent in entries ]
    ## Finding the root directory is done by writeHTML(), but
    ## the BibTeX file doesn't use that, so repeat the code here
    root = "../"*pathLength(config.TAG_DIRECTORIES[tag])
    if root == "": root = "."
    header,footer = getTemplate(config.BIBTEX_TEMPLATE_FILE)
    f = open(os.path.join(outdir,"bibtex.html"), 'w')
    print >>f, header % { 'command_line' : "",
                          'title': config.TAG_TITLES[tag],
                          'root': root }
    for ent in entries:
        print >>f, (
            ("<tr><td class='bibtex'><a name='%s'>%s</a>"
            "<pre class='bibtex'>%s</pre></td></tr>")
            %(BibTeX.url_untranslate(ent.key), ent.key, ent.format(90,8,1)))
    print >>f, footer
    f.close()
    f = open(os.path.join(outdir,"bibtex.json"), 'w')
    json.dump(entries, f, default=jsonDumper)
    f.close()
 if __name__ == '__main__':
    if len(sys.argv) == 2:
        print "Loading from %s"%sys.argv[1]
    else:
        print >>sys.stderr, "Expected a single configuration file as an argument"
        sys.exit(1)
    config.load(sys.argv[1])
    bib = BibTeX.parseFile(config.MASTER_BIB)
    for tag in config.TAG_DIRECTORIES.keys():
        writePageSet(config, bib, tag)