cache.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import web
  2. import cPickle as pickle
  3. from time import gmtime
  4. from datetime import datetime, timedelta
  5. from os import mkdir
  6. from os.path import getmtime, exists
  7. from hashlib import md5
  8. from copy import copy
  9. ADMINISTRATION_FILE = 'administration.py'
  10. def to_dir(path):
  11. """
  12. Transform a path to a directory, that can be used as prefix before a
  13. filename.
  14. """
  15. if not path:
  16. return ''
  17. if path[-1] != '/':
  18. return path + '/'
  19. return path
  20. def assert_file_exists(path):
  21. if not exists(path):
  22. raise IOError('File "%s" does not exist.' % path)
  23. def seconds_to_datetime(seconds):
  24. return datetime(*gmtime(seconds)[:7])
  25. class Cache:
  26. """
  27. A Cache instance represents a single file in the cache directory, which is
  28. a concatenation of the file list.
  29. """
  30. def __init__(self, root='', files=[], cached='cached',
  31. expires={'days': 365}):
  32. """
  33. The constructor takes the following arguments (all are optional):
  34. 1. The directory in which the cached files are located (empty by
  35. default).
  36. 2. An initial list of number of files to include in the cahce file.
  37. 3. The cached files directory (defaults to 'cached').
  38. 4. A dictionary containing arguments to the timedelta constructor, that
  39. indicates how long the cache object sould live.
  40. """
  41. self.root = to_dir(root)
  42. self.cached = to_dir(cached)
  43. self.files = map(lambda f: self.root + f, files)
  44. self.expires = expires
  45. self.assert_files_exist()
  46. def assert_files_exist(self):
  47. map(assert_file_exists, self.files)
  48. def __str__(self):
  49. return '<Cache filename=%s files=[%s]>' \
  50. % (self.filename(), ','.join(self.files))
  51. def add(self, path, absolute=False):
  52. """
  53. Add a file to the cache object. Requires the full path to the file,
  54. relative to the root directory. If the second argument is True, the
  55. root directory will not be prepended to the path.
  56. """
  57. if not absolute:
  58. path = self.root + path
  59. assert_file_exists(path)
  60. self.files.append(path)
  61. def remove(self, path, absolute=False):
  62. """
  63. Remove a file from the cache object. Requires the full path to the
  64. file, relative to the root directory. If the second argument is True,
  65. the root directory will not be prepended to the path.
  66. """
  67. if not absolute:
  68. path = self.root + path
  69. self.files.remove(path)
  70. def assert_modification_dates_exist(self):
  71. """
  72. Get the latest modification dates fo each file in the cache object.
  73. """
  74. if hasattr(self, 'modified'):
  75. return
  76. self.modified = {}
  77. for path in self.files:
  78. self.modified[path] = getmtime(path)
  79. def assert_cached_folder_exist(self):
  80. """
  81. Assert that the folder for cached files is created.
  82. """
  83. if not exists(self.cached):
  84. mkdir(self.cached)
  85. def save_administration(self, old_admin={}):
  86. """
  87. Generate a Python file containing the modification dates of the cached
  88. file list. The old_admin paramter can contain additional files which
  89. are not in this cache object, but do need to be kept in the
  90. administration for other cache objects.
  91. """
  92. self.assert_modification_dates_exist()
  93. self.assert_cached_folder_exist()
  94. admin = copy(old_admin)
  95. admin.update(self.modified)
  96. f = open(self.cached + ADMINISTRATION_FILE, 'w')
  97. pickle.dump(self.modified, f)
  98. f.close()
  99. def load_administration(self):
  100. path = self.cached + ADMINISTRATION_FILE
  101. if not exists(path):
  102. return {}
  103. f = open(path, 'r')
  104. modified = pickle.load(f)
  105. f.close()
  106. return modified
  107. def last_modified(self):
  108. self.assert_modification_dates_exist()
  109. return self.modified[max(self.modified)]
  110. def etag(self):
  111. """
  112. Generate an Etag for the cache object, using the names of the files
  113. included and the latest modification date.
  114. """
  115. h = md5()
  116. h.update(','.join(self.files) + str(self.last_modified()))
  117. return h.hexdigest()
  118. def filename(self):
  119. h = md5()
  120. h.update(','.join(self.files))
  121. return self.cached + h.hexdigest()
  122. def content(self):
  123. content = ''
  124. for path in self.files:
  125. f = open(path, 'r')
  126. content += f.read()
  127. f.close()
  128. return content
  129. def output(self):
  130. """"""
  131. # Update cached file
  132. last_modified = self.last_modified()
  133. path = self.filename()
  134. admin = self.load_administration()
  135. if not exists(path):
  136. web.debug('Cached file "%s" does not exist yet, generating it...')
  137. server_modified = True
  138. else:
  139. server_modified = False
  140. for f_path, f_modified in self.modified.iteritems():
  141. if f_path not in admin:
  142. web.debug('File "%s" has been added.' % f_path)
  143. server_modified = True
  144. elif f_modified > admin[f_path]:
  145. web.debug('File "%s" has been updated.' % f_path)
  146. server_modified = True
  147. if server_modified:
  148. self.save_administration(admin)
  149. content = self.content()
  150. f = open(path, 'w')
  151. f.write(content)
  152. f.close()
  153. try:
  154. web.http.modified(seconds_to_datetime(last_modified), self.etag())
  155. web.http.expires(timedelta(**self.expires))
  156. web.header('Cache-Control', 'private')
  157. if not server_modified:
  158. # Concatenated content has not been loaded yet, read the cached
  159. # file
  160. web.debug('Cached file "%s" already exists, sending content...')
  161. f = open(path, 'r')
  162. content = f.read()
  163. f.close()
  164. return content
  165. except web.NotModified as e:
  166. web.debug('Cached file "%s" not modified, setting 304 header...' \
  167. % path)
  168. raise e