Added anonbib for use in papers list

Source: https://gitweb.torproject.org/anonbib.git Commit: b478fc493d4be2115185d94e077bf06196495417
2013-08-11 11:14:00 +00:00
parent e38ef21b71
commit 7c42ce8329
23 changed files with 11209 additions and 0 deletions
--- a/i2p2www/anonbib/updateCache.py
+++ b/i2p2www/anonbib/updateCache.py
@ -0,0 +1,169 @@
+#!/usr/bin/python
+# Copyright 2003-2008, Nick Mathewson.  See LICENSE for licensing info.
+
+"""Download files in bibliography into a local cache.
+"""
+
+import os
+import sys
+import signal
+import time
+import gzip
+
+import BibTeX
+import config
+import urllib2
+import getopt
+import socket
+import errno
+import httplib
+
+FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ]
+BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ]
+
+class UIError(Exception):
+    pass
+
+def tryUnlink(fn):
+    try:
+        os.unlink(fn)
+    except OSError:
+        pass
+
+def getCacheFname(key, ftype, section):
+    return BibTeX.smartJoin(config.OUTPUT_DIR,config.CACHE_DIR,
+                            section,
+                            "%s.%s"%(key,ftype))
+
+def downloadFile(key, ftype, section, url,timeout=None):
+    if timeout is None:
+        timeout = config.DOWNLOAD_CONNECT_TIMEOUT
+    fname = getCacheFname(key, ftype, section)
+    parent = os.path.split(fname)[0]
+    if not os.path.exists(parent):
+        os.makedirs(parent)
+
+    fnameTmp = fname+".tmp"
+    fnameURL = fname+".url"
+    tryUnlink(fnameTmp)
+
+    def sigalrmHandler(sig,_):
+        pass
+    signal.signal(signal.SIGALRM, sigalrmHandler)
+    signal.alarm(timeout)
+    try:
+        try:
+            infile = urllib2.urlopen(url)
+        except httplib.InvalidURL, e:
+            raise UIError("Invalid URL %s: %s"%(url,e))
+        except IOError, e:
+            raise UIError("Cannot connect to url %s: %s"%(url,e))
+        except socket.error, e:
+            if getattr(e,"errno",-1) == errno.EINTR:
+                raise UIError("Connection timed out to url %s"%url)
+            else:
+                raise UIError("Error connecting to %s: %s"%(url, e))
+    finally:
+        signal.alarm(0)
+
+    mode = 'w'
+    if ftype in BIN_FILE_TYPES:
+        mode = 'wb'
+    outfile = open(fnameTmp, mode)
+    try:
+        while 1:
+            s = infile.read(1<<16)
+            if not s: break
+            outfile.write(s)
+    finally:
+        infile.close()
+        outfile.close()
+
+    urlfile = open(fnameURL, 'w')
+    print >>urlfile, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    if "\n" in url: url = url.replace("\n", " ")
+    print >>urlfile, url
+    urlfile.close()
+
+    os.rename(fnameTmp, fname)
+
+def getURLs(entry):
+    r = {}
+    for ftype in FILE_TYPES:
+        ftype2 = ftype.replace(".", "_")
+        url = entry.get("www_%s_url"%ftype2)
+        if url:
+            r[ftype] = url.strip().replace("\n", " ")
+    return r
+
+def getCachedURL(key, ftype, section):
+    fname = getCacheFname(key, ftype, section)
+    urlFname = fname+".url"
+    if not os.path.exists(fname) or not os.path.exists(urlFname):
+        return None
+    f = open(urlFname, 'r')
+    lines = f.readlines()
+    f.close()
+    if len(lines) != 2:
+        print >>sys.stderr, "ERROR: unexpected number of lines in", urlFname
+    return lines[1].strip()
+
+def downloadAll(bibtex, missingOnly=0):
+    """returns list of tuples of key, ftype, url, error"""
+    errors = []
+    for e in bibtex.entries:
+        urls = getURLs(e)
+        key = e.key
+        section = e.get("www_cache_section", ".")
+        for ftype, url in urls.items():
+            if missingOnly:
+                cachedURL = getCachedURL(key, ftype, section)
+                if cachedURL == url:
+                    print >>sys.stderr,"Skipping",url
+                    continue
+                elif cachedURL is not None:
+                    print >>sys.stderr,"URL for %s.%s has changed"%(key,ftype)
+                else:
+                    print >>sys.stderr,"I have no copy of %s.%s"%(key,ftype)
+            try:
+                downloadFile(key, ftype, section, url)
+                print "Downloaded",url
+            except UIError, e:
+                print >>sys.stderr, str(e)
+                errors.append((key,ftype,url,str(e)))
+            except (IOError, socket.error), e:
+                msg = "Error downloading %s: %s"%(url,str(e))
+                print >>sys.stderr, msg
+                errors.append((key,ftype,url,msg))
+        if urls.has_key("ps") and not urls.has_key("ps.gz"):
+            # Say, this is something we'd like to have gzipped locally.
+            psFname = getCacheFname(key, "ps", section)
+            psGzFname = getCacheFname(key, "ps.gz", section)
+            if os.path.exists(psFname) and not os.path.exists(psGzFname):
+                # This is something we haven't gzipped yet.
+                print "Compressing a copy of",psFname
+                outf = gzip.GzipFile(psGzFname, "wb")
+                inf = open(psFname, "rb")
+                while 1:
+                    s = inf.read(4096)
+                    if not s:
+                        break
+                    outf.write(s)
+                outf.close()
+                inf.close()
+
+    return errors
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        print "Loading from %s"%sys.argv[1]
+    else:
+        print >>sys.stderr, "Expected a single configuration file as an argument"
+        sys.exit(1)
+    config.load(sys.argv[1])
+
+    if config.CACHE_UMASK != None:
+        os.umask(config.CACHE_UMASK)
+
+    bib = BibTeX.parseFile(config.MASTER_BIB)
+    downloadAll(bib,missingOnly=1)