Files
i2p.www/i2p2www/anonbib/updateCache.py
2023-11-22 14:48:24 -05:00

172 lines
5.3 KiB
Python
Executable File

#!/usr/bin/python
# Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info.
"""Download files in bibliography into a local cache.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import signal
import time
import gzip
from . import BibTeX
from . import config
import six.moves.urllib.request, six.moves.urllib.error, six.moves.urllib.parse
import getopt
import socket
import errno
import six.moves.http_client
FILE_TYPES = [ "txt", "html", "pdf", "ps", "ps.gz", "abstract" ]
BIN_FILE_TYPES = [ 'pdf', 'ps.gz' ]
class UIError(Exception):
pass
def tryUnlink(fn):
try:
os.unlink(fn)
except OSError:
pass
def getCacheFname(key, ftype, section):
return BibTeX.smartJoin(config.OUTPUT_DIR,config.CACHE_DIR,
section,
"%s.%s"%(key,ftype))
def downloadFile(key, ftype, section, url,timeout=None):
if timeout is None:
timeout = config.DOWNLOAD_CONNECT_TIMEOUT
fname = getCacheFname(key, ftype, section)
parent = os.path.split(fname)[0]
if not os.path.exists(parent):
os.makedirs(parent)
fnameTmp = fname+".tmp"
fnameURL = fname+".url"
tryUnlink(fnameTmp)
def sigalrmHandler(sig,_):
pass
signal.signal(signal.SIGALRM, sigalrmHandler)
signal.alarm(timeout)
try:
try:
infile = six.moves.urllib.request.urlopen(url)
except six.moves.http_client.InvalidURL as e:
raise UIError("Invalid URL %s: %s"%(url,e))
except IOError as e:
raise UIError("Cannot connect to url %s: %s"%(url,e))
except socket.error as e:
if getattr(e,"errno",-1) == errno.EINTR:
raise UIError("Connection timed out to url %s"%url)
else:
raise UIError("Error connecting to %s: %s"%(url, e))
finally:
signal.alarm(0)
mode = 'w'
if ftype in BIN_FILE_TYPES:
mode = 'wb'
outfile = open(fnameTmp, mode)
try:
while 1:
s = infile.read(1<<16)
if not s: break
outfile.write(s)
finally:
infile.close()
outfile.close()
urlfile = open(fnameURL, 'w')
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), file=urlfile)
if "\n" in url: url = url.replace("\n", " ")
print(url, file=urlfile)
urlfile.close()
os.rename(fnameTmp, fname)
def getURLs(entry):
r = {}
for ftype in FILE_TYPES:
ftype2 = ftype.replace(".", "_")
url = entry.get("www_%s_url"%ftype2)
if url:
r[ftype] = url.strip().replace("\n", " ")
return r
def getCachedURL(key, ftype, section):
fname = getCacheFname(key, ftype, section)
urlFname = fname+".url"
if not os.path.exists(fname) or not os.path.exists(urlFname):
return None
f = open(urlFname, 'r')
lines = f.readlines()
f.close()
if len(lines) != 2:
print("ERROR: unexpected number of lines in", urlFname, file=sys.stderr)
return lines[1].strip()
def downloadAll(bibtex, missingOnly=0):
"""returns list of tuples of key, ftype, url, error"""
errors = []
for e in bibtex.entries:
urls = getURLs(e)
key = e.key
section = e.get("www_cache_section", ".")
for ftype, url in urls.items():
if missingOnly:
cachedURL = getCachedURL(key, ftype, section)
if cachedURL == url:
print("Skipping",url, file=sys.stderr)
continue
elif cachedURL is not None:
print("URL for %s.%s has changed"%(key,ftype), file=sys.stderr)
else:
print("I have no copy of %s.%s"%(key,ftype), file=sys.stderr)
try:
downloadFile(key, ftype, section, url)
print("Downloaded",url)
except UIError as e:
print(str(e), file=sys.stderr)
errors.append((key,ftype,url,str(e)))
except (IOError, socket.error) as e:
msg = "Error downloading %s: %s"%(url,str(e))
print(msg, file=sys.stderr)
errors.append((key,ftype,url,msg))
if "ps" in urls and "ps.gz" not in urls:
# Say, this is something we'd like to have gzipped locally.
psFname = getCacheFname(key, "ps", section)
psGzFname = getCacheFname(key, "ps.gz", section)
if os.path.exists(psFname) and not os.path.exists(psGzFname):
# This is something we haven't gzipped yet.
print("Compressing a copy of",psFname)
outf = gzip.GzipFile(psGzFname, "wb")
inf = open(psFname, "rb")
while 1:
s = inf.read(4096)
if not s:
break
outf.write(s)
outf.close()
inf.close()
return errors
if __name__ == '__main__':
if len(sys.argv) == 2:
print("Loading from %s"%sys.argv[1])
else:
print("Expected a single configuration file as an argument", file=sys.stderr)
sys.exit(1)
config.load(sys.argv[1])
if config.CACHE_UMASK != None:
os.umask(config.CACHE_UMASK)
bib = BibTeX.parseFile(config.MASTER_BIB)
downloadAll(bib,missingOnly=1)