import web import cPickle as pickle from time import gmtime from datetime import datetime, timedelta from os import mkdir from os.path import getmtime, exists, dirname, realpath from hashlib import md5 from copy import copy ADMINISTRATION_FILE = 'administration.pickle' def to_dir(path): """ Transform a path to an absolute directory, that can be used as prefix before a filename. """ return realpath(dirname(path)) + '/' def assert_file_exists(path): if not exists(path): raise IOError('File "%s" does not exist.' % path) def seconds_to_datetime(seconds): return datetime(*gmtime(seconds)[:7]) class Cache: """ A Cache instance represents a single file in the cache directory, which is a concatenation of the file list. """ def __init__(self, root='', files=[], cached='cached/', expires={'days': 365}): """ The constructor takes the following arguments (all are optional): 1. The directory in which the cached files are located (empty by default). 2. An initial list of number of files to include in the cahce file. 3. The cached files directory (defaults to 'cached'). 4. A dictionary containing arguments to the timedelta constructor, that indicates how long the cache object sould live. """ self.root = to_dir(root) self.cached = to_dir(cached) self.files = [] self.expires = expires for f in files: self.add(f) web.debug(str(self)) self.assert_files_exist() def assert_files_exist(self): map(assert_file_exists, self.files) def __str__(self): return '' \ % (self.root, self.filename(), ','.join(self.files)) def add(self, path, absolute=False): """ Add a file to the cache object. Requires the full path to the file, relative to the root directory. If the second argument is True, the root directory will not be prepended to the path. """ if not absolute: path = self.root + path web.debug('Adding file "%s"...' % path) assert_file_exists(path) self.files.append(path) def remove(self, path, absolute=False): """ Remove a file from the cache object. Requires the full path to the file, relative to the root directory. If the second argument is True, the root directory will not be prepended to the path. """ if not absolute: path = self.root + path self.files.remove(path) def assert_modification_dates_exist(self): """ Get the latest modification dates fo each file in the cache object. """ if hasattr(self, 'modified'): return self.modified = {} for path in self.files: self.modified[path] = getmtime(path) def assert_cached_folder_exist(self): """ Assert that the folder for cached files is created. """ if not exists(self.cached): mkdir(self.cached) def save_administration(self, old_admin={}): """ Generate a Python file containing the modification dates of the cached file list. The old_admin paramter can contain additional files which are not in this cache object, but do need to be kept in the administration for other cache objects. """ self.assert_modification_dates_exist() self.assert_cached_folder_exist() admin = copy(old_admin) admin.update(self.modified) f = open(self.cached + ADMINISTRATION_FILE, 'w') pickle.dump(self.modified, f) f.close() def load_administration(self): path = self.cached + ADMINISTRATION_FILE if not exists(path): return {} f = open(path, 'r') modified = pickle.load(f) f.close() return modified def last_modified(self): self.assert_modification_dates_exist() return self.modified[max(self.modified)] def etag(self): """ Generate an Etag for the cache object, using the names of the files included and the latest modification date. """ h = md5() h.update(','.join(self.files) + str(self.last_modified())) return h.hexdigest() def filename(self): h = md5() h.update(','.join(self.files)) return self.cached + h.hexdigest() def content(self): content = '' for path in self.files: f = open(path, 'r') content += f.read() f.close() return content def output(self): """""" # Update cached file last_modified = self.last_modified() path = self.filename() admin = self.load_administration() if not exists(path): web.debug('Cached file "%s" does not exist yet, generating it...' \ % path) server_modified = True else: server_modified = False for f_path, f_modified in self.modified.iteritems(): if f_path not in admin: web.debug('File "%s" has been added.' % f_path) server_modified = True elif f_modified > admin[f_path]: web.debug('File "%s" has been updated.' % f_path) server_modified = True if server_modified: self.save_administration(admin) content = self.content() f = open(path, 'w') f.write(content) f.close() try: # Set headers web.header('Cache-Control', 'private') if not server_modified: web.http.modified(seconds_to_datetime(last_modified), self.etag()) web.http.expires(timedelta(**self.expires)) if not server_modified: # Concatenated content has not been loaded yet, read the cached # file web.debug('Cached file "%s" already exists...' % path) f = open(path, 'r') content = f.read() f.close() return content except web.NotModified as e: web.debug('Cached file "%s" not modified, setting 304 header...' \ % path) raise e