#!/usr/bin/env python # -*- coding: utf-8 -*- # """ caching (Caching Module) ======================== **Author:** * Dirk Alders **Description:** This Module supports functions and classes for caching e.g. properties of other instances. **Submodules:** * :class:`caching.property_cache_json` * :class:`caching.property_cache_pickle` **Unittest:** See also the :download:`unittest ` documentation. """ __DEPENDENCIES__ = [] import json import logging import os import pickle import time try: from config import APP_NAME as ROOT_LOGGER_NAME except ImportError: ROOT_LOGGER_NAME = 'root' logger = logging.getLogger(ROOT_LOGGER_NAME).getChild(__name__) __DESCRIPTION__ = """The Module {\\tt %s} is designed to store information in {\\tt json} or {\\tt pickle} files to support them much faster then generating them from the original source file. For more Information read the documentation.""" % __name__.replace('_', '\_') """The Module Description""" __INTERPRETER__ = (3, ) """The Tested Interpreter-Versions""" class property_cache_pickle(object): """ This class caches the data from a given `source_instance`. It takes the data from the cache instead of generating the data from the `source_instance`, if the conditions for the cache usage are given. .. admonition:: Required properties for the `source_instance` * **uid():** returns the unique id of the source's source or None, if you don't want to use the unique id. * **keys():** returns a list of all available keys. * **data_version():** returns a version number of the current data (it should be increased, if the get method of the source instance returns improved values or the data structure had been changed). * **get(key, default):** returns the property for a key. If key does not exists, default will be returned. :param source_instance: The source instance holding the data :type source_instance: instance :param cache_filename: File name, where the properties are stored as cache :type cache_filename: str :param load_all_on_init: True will load all data from the source instance, when the cache will be initialised the first time. :type load_all_on_init: bool :param callback_on_data_storage: The callback will be executed every time when the cache file is stored. It will be executed with the instance of this class as first argument. :type callback_on_data_storage: method :param max_age: The maximum age of the cached data in seconds or None for no maximum age. :type max_age: int or None :param store_on_get: False will prevent cache storage with execution of the `.get(key, default)` method. You need to store the cache somewhere else. :type store_on_get: bool .. admonition:: The cache will be used, if all following conditions are given * The key is in the list returned by `.keys()` method of the `source_instance` * The key is not in the list of keys added by the `.add_source_get_keys()` method. * The cache age is less then the given max_age parameter or the given max_age is None. * The uid of the source instance (e.g. a checksum or unique id of the source) is identically to to uid stored in the cache. * The data version of the `source_instance` is <= the data version stored in the cache. * The value is available in the previous stored information **Example:** .. literalinclude:: caching/_examples_/property_cache_pickle.py Will result on the first execution to the following output (with a long execution time): .. literalinclude:: caching/_examples_/property_cache_pickle_1.log With every following execution the time cosumption my by much smaller: .. literalinclude:: caching/_examples_/property_cache_pickle_2.log """ DATA_VERSION_TAG = '_property_cache_data_version_' STORAGE_VERSION_TAG = '_storage_version_' UID_TAG = '_property_cache_uid_' DATA_TAG = '_data_' AGE_TAG = '_age_' # STORAGE_VERSION = 1 def __init__(self, source_instance, cache_filename, load_all_on_init=False, callback_on_data_storage=None, max_age=None, store_on_get=True): self._source_instance = source_instance self._cache_filename = cache_filename self._load_all_on_init = load_all_on_init self._callback_on_data_storage = callback_on_data_storage self._max_age = max_age self._store_on_get = store_on_get # self._source_get_keys = [] self._cached_props = None def add_source_get_keys(self, keys): """ This will add one or more keys to a list of keys which will always be provided by the `source_instance` instead of the cache. :param keys: The key or keys to be added :type keys: list, tuple, str """ if type(keys) in [list, tuple]: self._source_get_keys.extend(keys) else: self._source_get_keys.append(keys) def full_update(self, sleep_between_keys=0): """ With the execution of this method, the complete source data which needs to be cached, will be read from the source instance and the resulting cache will be stored to the given file. :param sleep_between_keys: Time to sleep between each source data generation :type sleep_between_keys: float, int .. hint:: Use this method, if you initiallised the class with `store_on_get=False` """ self._load_source(sleep_between_keys=sleep_between_keys) self._save_cache() def get(self, key, default=None): """ Method to get the cached property. If the key does not exists in the cache or `source_instance`, `default` will be returned. :param key: key for value to get. :param default: value to be returned, if key does not exists. :returns: value for a given key or default value. """ if key in self._source_instance.keys() and key not in self._source_get_keys: if self._cached_props is None: self._init_cache() if self._max_age is None: cache_old = False else: cache_old = time.time() - self._cached_props[self.AGE_TAG].get(self._key_filter(key), 0) > self._max_age if cache_old: logger.debug("The cached value is old, cached value will be ignored") if self._key_filter(key) not in self._cached_props[self.DATA_TAG] or cache_old: logger.debug("Loading property for key='%s' from source instance", key) val = self._source_instance.get(key, None) if self._store_on_get: tm = int(time.time()) logger.debug("Adding key=%s, value=%s with timestamp=%d to chache", key, val, tm) self._cached_props[self.DATA_TAG][self._key_filter(key)] = val self._cached_props[self.AGE_TAG][self._key_filter(key)] = tm self._save_cache() else: return val else: logger.debug("Providing property for '%s' from cache", key) return self._cached_props[self.DATA_TAG].get(self._key_filter(key), default) else: if key not in self._source_instance.keys(): logger.debug("Key '%s' is not in cached_keys. Uncached data will be returned.", key) elif key in self._source_get_keys: logger.debug("Key '%s' is excluded by .add_source_get_keys(). Uncached data will be returned.", key) return self._source_instance.get(key, default) def _data_version(self): if self._cached_props is None: return None else: return self._cached_props.get(self.DATA_VERSION_TAG, None) def _storage_version(self): if self._cached_props is None: return None else: return self._cached_props.get(self.STORAGE_VERSION_TAG, None) def _init_cache(self): load_cache = self._load_cache() uid = self._source_instance.uid() != self._uid() try: data_version = self._source_instance.data_version() > self._data_version() except TypeError: data_version = True try: storage_version = self._storage_version() != self.STORAGE_VERSION except TypeError: storage_version = True # if not load_cache or uid or data_version or storage_version: if load_cache: if self._uid() is not None and uid: logger.debug("Source uid changed, ignoring previous cache data") if self._data_version() is not None and data_version: logger.debug("Data version increased, ignoring previous cache data") if storage_version: logger.debug("Storage version changed, ignoring previous cache data") self._cached_props = {self.AGE_TAG: {}, self.DATA_TAG: {}} if self._load_all_on_init: self._load_source() self._cached_props[self.UID_TAG] = self._source_instance.uid() self._cached_props[self.DATA_VERSION_TAG] = self._source_instance.data_version() self._cached_props[self.STORAGE_VERSION_TAG] = self.STORAGE_VERSION self._save_cache() def _load_cache(self): if os.path.exists(self._cache_filename): with open(self._cache_filename, 'rb') as fh: self._cached_props = pickle.load(fh) logger.debug('Loading properties from cache (%s)', self._cache_filename) return True else: logger.debug('Cache file does not exists (yet).') return False def _key_filter(self, key): return key def _load_source(self, sleep_between_keys=0): if self._cached_props is None: self._init_cache() logger.debug('Loading all data from source - %s', repr(self._source_instance.keys())) for key in self._source_instance.keys(): if key not in self._source_get_keys: self._cached_props[self.DATA_TAG][self._key_filter(key)] = self._source_instance.get(key) self._cached_props[self.AGE_TAG][self._key_filter(key)] = int(time.time()) time.sleep(sleep_between_keys) def _save_cache(self): with open(self._cache_filename, 'wb') as fh: pickle.dump(self._cached_props, fh) logger.debug('cache-file stored (%s)', self._cache_filename) if self._callback_on_data_storage is not None: self._callback_on_data_storage(self) def _uid(self): if self._cached_props is None: return None else: return self._cached_props.get(self.UID_TAG, None) class property_cache_json(property_cache_pickle): """ See also parent :py:class:`property_cache_pickle` for detailed information. .. important:: * This class uses json. You should **only** use keys of type string! * Unicode types are transfered to strings See limitations of json. **Example:** .. literalinclude:: caching/_examples_/property_cache_json.py Will result on the first execution to the following output (with a long execution time): .. literalinclude:: caching/_examples_/property_cache_json_1.log With every following execution the time cosumption my by much smaller: .. literalinclude:: caching/_examples_/property_cache_json_2.log """ def _load_cache(self): if os.path.exists(self._cache_filename): with open(self._cache_filename, 'r') as fh: self._cached_props = json.load(fh) logger.debug('Loading properties from cache (%s)', self._cache_filename) return True else: logger.debug('Cache file does not exists (yet).') return False def _save_cache(self): with open(self._cache_filename, 'w') as fh: json.dump(self._cached_props, fh, sort_keys=True, indent=4) logger.debug('cache-file stored (%s)', self._cache_filename) if self._callback_on_data_storage is not None: self._callback_on_data_storage(self)