#!/usr/bin/env python # DT WSGI Sitemap # # Douglas Thrift # # $Id$ # Copyright 2010 Douglas Thrift # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from cStringIO import StringIO from lxml import etree import urlparse import gzip class Sitemap(object): __namespace = 'http://www.sitemaps.org/schemas/sitemap/0.9' lastmod = None changefreq = None priority = None def __init__(self, url = ''): self.url = url def __iter__(self): yield str(self) def __str__(self): namespace = 'http://www.w3.org/1999/XSL/Transform' nsmap = {'xsl': namespace} stylesheet = etree.Element(etree.QName(namespace, 'stylesheet'), version = '1.0', nsmap = nsmap) template = etree.SubElement(stylesheet, etree.QName(namespace, 'template'), match = '@*|node()') copy = etree.SubElement(template, etree.QName(namespace, 'copy')) etree.SubElement(copy, etree.QName(namespace, 'apply-templates')) template = etree.SubElement(stylesheet, etree.QName(namespace, 'template'), match = '/urlset') etree.SubElement(template, etree.QName(namespace, 'comment')).text = ' $Id$ ' urlset = etree.SubElement(template, 'urlset') etree.SubElement(urlset, etree.QName(namespace, 'copy-of'), select = 'namespace::*') etree.SubElement(urlset, etree.QName(namespace, 'apply-templates')) sitemap = etree.XSLT(stylesheet)(self._urlset()) sitemap.getroot().set('xmlns', self.__namespace) return etree.tostring(sitemap, encoding = 'UTF-8', pretty_print = True, xml_declaration = True) def _(self, name): value = getattr(self, name) if not callable(value): return lambda *args, **kwargs: value return value def _required(self, parent, name, item = None, value = None, multiple = False, function = None, namespace = None): if item is not None: value = self._(name)(item) if namespace is not None and not isinstance(name, etree.QName): name = etree.QName(namespace, name.partition('_')[2]) if multiple: for value in value: self._required(parent, name, None, value, function = function) else: etree.SubElement(parent, name).text = function(value) if function is not None else value def _optional(self, parent, name, item, **kwargs): value = self._(name)(item) if value is not None: self._required(parent, name, value = value, **kwargs) return True return False def _url(self, url, item): self._required(url, 'loc', item, function = self._url_value, namespace = None) self._optional(url, 'lastmod', item, function = lambda value: value.isoformat(), namespace = None) self._optional(url, 'changefreq', item, namespace = None) self._optional(url, 'priority', item, function = lambda value: '%.1f' % value, namespace = None) def _urls(self, urlset): for item in self._('items')(): self._url(etree.SubElement(urlset, 'url'), item) def _urlset(self, nsmap = {}): urlset = etree.Element('urlset', xmlns = self.__namespace, nsmap = nsmap) self._urls(urlset) return urlset def _url_value(self, value): value = urlparse.urlsplit(value) if value.scheme and value.netloc: return value.geturl() return urlparse.urlunsplit(self.url[:2] + value[2:]) def items(self): return [] def loc(self, item): return item @property def url(self): return self.__url @url.setter def url(self, url): self.__url = urlparse.urlsplit(url) def namespace(cls): return ''' def _required(self, *args, **kwargs): kwargs.setdefault('namespace', self._%s__namespace) super(%s, self)._required(*args, **kwargs) ''' % (cls, cls) class VideoSitemap(Sitemap): __namespace = 'http://www.google.com/schemas/sitemap-video/1.1' video_content_loc = None video_player_loc = None video_duration = None video_expiration_date = None video_content_segment_loc = None video_rating = None video_view_count = None video_publication_date = None video_tag = None video_category = None video_family_friendly = None video_restriction = None video_gallery_loc = None video_price = None video_requires_subscription = None video_uploader = None exec namespace('VideoSitemap') def _url(self, url, item): super(VideoSitemap, self)._url(url, item) video = etree.SubElement(url, etree.QName(self.__namespace, 'video')) self._required(video, 'video_thumbnail_loc', item, function = self._url_value) self._required(video, 'video_title', item) self._required(video, 'video_description', item) video_content_loc = self._optional(video, 'video_content_loc', item, function = self._url_value) video_player_loc = self._optional(video, 'video_player_loc', item, function = self._url_value) assert video_content_loc or video_player_loc self._optional(video, 'video_duration', item, function = lambda value: '%u' % value) self._optional(video, 'video_expiration_date', item, function = lambda value: value.isoformat()) self._optional(video, 'video_content_segment_loc', item, multiple = True, function = self._url_value) self._optional(video, 'video_rating', item, function = lambda value: '%.1f' % value) self._optional(video, 'video_view_count', item, function = lambda value: '%u') self._optional(video, 'video_publication_date', item, function = lambda value: value.isoformat()) self._optional(video, 'video_tag', item, multiple = True) self._optional(video, 'video_category', item) self._optional(video, 'video_family_friendly', item, function = lambda value: 'yes' if value else 'no') def _urlset(self, nsmap = {}): nsmap['video'] = self.__namespace return super(VideoSitemap, self)._urlset(nsmap) def video_thumbnail_loc(self, item): raise NotImplementedError def video_title(self, item): raise NotImplementedError def video_description(self, item): raise NotImplementedError class MobileSitemap(Sitemap): __namespace = 'http://www.google.com/schemas/sitemap-mobile/1.0' def _url(self, url, item): super(MobileSitemap, self)._url(url, item) etree.SubElement(url, etree.QName(self.__namespace, 'mobile')) def _urlset(self, nsmap = {}): nsmap['mobile'] = self.__namespace return super(MobileSitemap, self)._urlset(nsmap) class GeoSitemap(Sitemap): __namespace = 'http://www.google.com/geo/schemas/sitemap/1.0' exec namespace('GeoSitemap') def geo_format(self, item): raise NotImplementedError def _url(self, url, item): super(GeoSitemap, self)._url(url, item) geo = etree.SubElement(url, etree.QName(self.__namespace, 'geo')) self._required(geo, 'geo_format', item) def _urlset(self, nsmap = {}): nsmap['geo'] = self.__namespace return super(GeoSitemap, self)._urlset(nsmap) class CodeSearchSitemap(Sitemap): __namespace = 'http://www.google.com/codesearch/schemas/sitemap/1.0' codesearch_license = None codesearch_filename = None codesearch_packageurl = None codesearch_packagemap = None exec namespace('CodeSearchSitemap') def _url(self, url, item): super(CodeSearchSitemap, self)._url(url, item) codesearch = etree.SubElement(url, etree.QName(self.__namespace, 'codesearch')) self._required(codesearch, 'codesearch_filetype', item) self._optional(codesearch, 'codesearch_license', item) self._optional(codesearch, 'codesearch_filename', item) self._optional(codesearch, 'codesearch_packageurl', item, function = self._url_value) self._optional(codesearch, 'codesearch_packagemap', item) def _urlset(self, nsmap = {}): nsmap['codesearch'] = self.__namespace return super(CodeSearchSitemap, self)._urlset(nsmap) def codesearch_filetype(self, item): raise NotImplementedError class ImageSitemap(Sitemap): __namespace = 'http://www.google.com/schemas/sitemap-image/1.1' image_caption = None image_geo_location = None image_title = None image_license = None exec namespace('ImageSitemap') def _url(self, url, item): super(ImageSitemap, self)._url(url, item) image = etree.SubElement(url, etree.QName(self.__namespace, 'image')) self._required(image, 'image_loc', item, function = self._url_value) self._optional(image, 'image_caption', item) self._optional(image, 'image_geo_location', item) self._optional(image, 'image_title', item) self._optional(image, 'image_license', item) def _urlset(self, nsmap = {}): nsmap['image'] = self.__namespace return super(ImageSitemap, self)._urlset(nsmap) def image_image(self, item): raise NotImplementedError def image_loc(self, item): raise NotImplementedError class _Sitemaps(Sitemap): def __init__(self, sitemaps): self.__sitemaps = sitemaps def _urlset(self, nsmap = {}): if self.__sitemaps: for index, sitemap in enumerate(self.__sitemaps): if not index: urlset = sitemap._urlset(nsmap) else: sitemap._urls(urlset) return urlset return super(_Sitemaps, self)._urlset(nsmap) @property def url(self): pass @url.setter def url(self, url): if self.__sitemaps: for sitemap in self.__sitemaps: sitemap.url = url def sitemap(sitemaps = None, gz = False, url = None, string = False): if not isinstance(sitemaps, Sitemap): sitemaps = _Sitemap(sitemaps) if url is not None: sitemaps.url = url if not gz: return str(sitemaps) if string else sitemaps sitemap = StringIO() gz = gzip.GzipFile(filename = 'sitemap.xml', mode = 'wb', fileobj = sitemap) gz.writelines(sitemaps) gz.close() sitemap.seek(0) return sitemap.getvalue() if string else sitemap def wsgi(function = None, string = False): class WSGI(object): def __init__(self, function): self.function = function self.__name__ = function.__name__ self.__module__ = function.__module__ self.__doc__ = function.__doc__ def __call__(self, *args, **kwargs): value = self.function(*args, **kwargs) if isinstance(value, dict): return sitemap(value.get('sitemaps'), value.get('gz'), value.get('url'), string = string) return value return WSGI(function) if function is not None else WSGI def bottle(function): @wsgi def _bottle(*args, **kwargs): import bottle value = function(*args, **kwargs) if isinstance(value, dict): value.setdefault('url', bottle.request.url) if value.get('gz'): bottle.response.headers['Content-Encoding'] = 'x-gzip' bottle.response.content_type = 'application/xml' return value return _bottle