cache.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. import web
  2. import cPickle as pickle
  3. from time import gmtime
  4. from datetime import datetime, timedelta
  5. from os import mkdir
  6. from os.path import getmtime, exists, dirname, realpath
  7. from hashlib import md5
  8. from copy import copy
  9. ADMINISTRATION_FILE = 'administration.py'
  10. def to_dir(path):
  11. """
  12. Transform a path to an absolute directory, that can be used as prefix
  13. before a filename.
  14. """
  15. return realpath(dirname(path)) + '/'
  16. def assert_file_exists(path):
  17. if not exists(path):
  18. raise IOError('File "%s" does not exist.' % path)
  19. def seconds_to_datetime(seconds):
  20. return datetime(*gmtime(seconds)[:7])
  21. class Cache:
  22. """
  23. A Cache instance represents a single file in the cache directory, which is
  24. a concatenation of the file list.
  25. """
  26. def __init__(self, root='', files=[], cached='cached',
  27. expires={'days': 365}):
  28. """
  29. The constructor takes the following arguments (all are optional):
  30. 1. The directory in which the cached files are located (empty by
  31. default).
  32. 2. An initial list of number of files to include in the cahce file.
  33. 3. The cached files directory (defaults to 'cached').
  34. 4. A dictionary containing arguments to the timedelta constructor, that
  35. indicates how long the cache object sould live.
  36. """
  37. self.root = to_dir(root + '/')
  38. self.cached = to_dir(cached + '/')
  39. self.files = map(lambda f: self.root + f, files)
  40. self.expires = expires
  41. self.assert_files_exist()
  42. def assert_files_exist(self):
  43. map(assert_file_exists, self.files)
  44. def __str__(self):
  45. return '<Cache filename=%s files=[%s]>' \
  46. % (self.filename(), ','.join(self.files))
  47. def add(self, path, absolute=False):
  48. """
  49. Add a file to the cache object. Requires the full path to the file,
  50. relative to the root directory. If the second argument is True, the
  51. root directory will not be prepended to the path.
  52. """
  53. if not absolute:
  54. path = self.root + path
  55. assert_file_exists(path)
  56. self.files.append(path)
  57. def remove(self, path, absolute=False):
  58. """
  59. Remove a file from the cache object. Requires the full path to the
  60. file, relative to the root directory. If the second argument is True,
  61. the root directory will not be prepended to the path.
  62. """
  63. if not absolute:
  64. path = self.root + path
  65. self.files.remove(path)
  66. def assert_modification_dates_exist(self):
  67. """
  68. Get the latest modification dates fo each file in the cache object.
  69. """
  70. if hasattr(self, 'modified'):
  71. return
  72. self.modified = {}
  73. for path in self.files:
  74. self.modified[path] = getmtime(path)
  75. def assert_cached_folder_exist(self):
  76. """
  77. Assert that the folder for cached files is created.
  78. """
  79. if not exists(self.cached):
  80. mkdir(self.cached)
  81. def save_administration(self, old_admin={}):
  82. """
  83. Generate a Python file containing the modification dates of the cached
  84. file list. The old_admin paramter can contain additional files which
  85. are not in this cache object, but do need to be kept in the
  86. administration for other cache objects.
  87. """
  88. self.assert_modification_dates_exist()
  89. self.assert_cached_folder_exist()
  90. admin = copy(old_admin)
  91. admin.update(self.modified)
  92. f = open(self.cached + ADMINISTRATION_FILE, 'w')
  93. pickle.dump(self.modified, f)
  94. f.close()
  95. def load_administration(self):
  96. path = self.cached + ADMINISTRATION_FILE
  97. if not exists(path):
  98. return {}
  99. f = open(path, 'r')
  100. modified = pickle.load(f)
  101. f.close()
  102. return modified
  103. def last_modified(self):
  104. self.assert_modification_dates_exist()
  105. return self.modified[max(self.modified)]
  106. def etag(self):
  107. """
  108. Generate an Etag for the cache object, using the names of the files
  109. included and the latest modification date.
  110. """
  111. h = md5()
  112. h.update(','.join(self.files) + str(self.last_modified()))
  113. return h.hexdigest()
  114. def filename(self):
  115. h = md5()
  116. h.update(','.join(self.files))
  117. return self.cached + h.hexdigest()
  118. def content(self):
  119. content = ''
  120. for path in self.files:
  121. f = open(path, 'r')
  122. content += f.read()
  123. f.close()
  124. return content
  125. def output(self):
  126. """"""
  127. # Update cached file
  128. last_modified = self.last_modified()
  129. path = self.filename()
  130. admin = self.load_administration()
  131. if not exists(path):
  132. web.debug('Cached file "%s" does not exist yet, generating it...')
  133. server_modified = True
  134. else:
  135. server_modified = False
  136. for f_path, f_modified in self.modified.iteritems():
  137. if f_path not in admin:
  138. web.debug('File "%s" has been added.' % f_path)
  139. server_modified = True
  140. elif f_modified > admin[f_path]:
  141. web.debug('File "%s" has been updated.' % f_path)
  142. server_modified = True
  143. if server_modified:
  144. self.save_administration(admin)
  145. content = self.content()
  146. f = open(path, 'w')
  147. f.write(content)
  148. f.close()
  149. try:
  150. web.http.modified(seconds_to_datetime(last_modified), self.etag())
  151. web.http.expires(timedelta(**self.expires))
  152. web.header('Cache-Control', 'private')
  153. if not server_modified:
  154. # Concatenated content has not been loaded yet, read the cached
  155. # file
  156. web.debug('Cached file "%s" already exists, sending content...')
  157. f = open(path, 'r')
  158. content = f.read()
  159. f.close()
  160. return content
  161. except web.NotModified as e:
  162. web.debug('Cached file "%s" not modified, setting 304 header...' \
  163. % path)
  164. raise e