Add sitemap generation

Initially the rel="alternate" hreflang="xx" links in the sitemap caused the
size of the file to increase exponentially as additional languages were added.
The current implementation has a single sitemap.xml for each language, with a
sitemap_index.xml at the root.
This commit is contained in:
str4d
2013-01-04 12:38:49 +00:00
parent 3e8bd6eba3
commit a6a39fcb36
3 changed files with 105 additions and 0 deletions

View File

@ -15,10 +15,26 @@ CANONICAL_DOMAIN = 'www.i2p2.de'
BLOG_ENTRIES_PER_PAGE = 20
MEETINGS_PER_PAGE = 20
SUPPORTED_LANGS = [
'en',
'es',
'zh',
'de',
'fr',
'it',
'nl',
'ru',
'sv',
'cs',
'ar',
'el',
]
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), 'pages')
STATIC_DIR = os.path.join(os.path.dirname(__file__), 'static')
BLOG_DIR = os.path.join(os.path.dirname(__file__), 'blog')
MEETINGS_DIR = os.path.join(os.path.dirname(__file__), 'meetings/logs')
SITE_DIR = os.path.join(TEMPLATE_DIR, 'site')
MIRRORS_FILE = os.path.join(TEMPLATE_DIR, 'downloads/mirrors')

86
i2p2www/sitemap.py Normal file
View File

@ -0,0 +1,86 @@
from flask import make_response, render_template, request, safe_join
import os.path
from i2p2www import SITE_DIR, SUPPORTED_LANGS
from i2p2www.blog.helpers import get_blog_slugs
from i2p2www.meetings.helpers import get_meetings_ids
##########
# Sitemaps
def render_sitemap_index():
# Include the / at the end, so the language can be
# sandwiched between url_root and /sitemap.xml in
# the template.
url_root = request.url_root
# Render and return the sitemap index
response = make_response(render_template('global/sitemap_index.xml', url_root=url_root, langs=SUPPORTED_LANGS))
response.headers['Content-Type'] = 'application/xml'
return response
def render_sitemap():
# Include the / at the end, so the language can be
# sandwiched between url_root and url.path in the
# template.
url_root = request.url_root
urls = []
# --------------
# Main site urls
# --------------
# walk over all directories/files
for v in os.walk(SITE_DIR):
# iterate over all files
pathbase = os.path.relpath(v[0], SITE_DIR)
for f in v[2]:
# ignore all non-.html files
if not f.endswith('.html'):
continue
path = pathbase
if f != 'index.html':
path = safe_join(pathbase, f[:-5])
if path.startswith('.'):
path = path[1:]
if not path.startswith('/'):
path = '/%s' % path
urls.append({
'path': path,
})
# ---------
# Blog urls
# ---------
urls.append({
'path': '/blog/',
})
blog_slugs = get_blog_slugs()
for slug in blog_slugs:
urls.append({
'path': '/blog/entry/%s' % slug,
})
# -------------
# Meetings urls
# -------------
urls.append({
'path': '/meetings/',
})
meetings = get_meetings_ids()
for id in meetings:
urls.append({
'path': '/meetings/%d' % id,
})
# --------------
# Downloads urls
# --------------
urls.append({
'path': '/download',
})
# Render and return the sitemap
response = make_response(render_template('global/sitemap.xml', url_root=url_root, langs=SUPPORTED_LANGS, urls=urls))
response.headers['Content-Type'] = 'application/xml'
return response

View File

@ -68,3 +68,6 @@ url('/<string:f>.html', 'legacy.legacy_show')
url('/hosts.txt', 'views.hosts')
url('/robots.txt', 'views.robots')
url('/favicon.ico', 'views.favicon')
url('/sitemap_index.xml', 'sitemap.render_sitemap_index')
url('/<lang:lang>/sitemap.xml', 'sitemap.render_sitemap')