#!/usr/bin/env python """ Cabinet.py - locking, compressing, slicing, dicing shelves Cabinets are very similar to shelves, except that you instantise them with a filename and a mode to open with, instead of feeding them the filehandle of a dbm, as you do with shelves. The first character of the mode is how you wish to open it, as with a dbm file. However, you can also append other characters to make the Cabinet do other things. Currently supported; 'l' - Lock the cabinet. This will use lockf() on a dummy file to prevent cabinet corruption. 's' - log transactions to a logfile. 'z' - cabinet compression. The cabinet will be compressed with zlib, to the level set in the compression attribute (3 by default). zlib must be available to use compression. Cabinets also have more dictionary-like attributes, such as a get() method. A dump() method has been added, to make it easy to pretty-print a particular key, or the whole Cabinet if none is specified. More features may be added in the future. Currently, AnyCab (based on anydbm) and BtreeCab (based on bsddb) are available. If you use BtreeCab, see the warning in its doc string. Example: >>> import Cabinet >>> f = Cabinet.BtreeCab('/tmp/foo', 'clz') # a locked, compressing cabinet >>> f['a'] = 'foo' >>> f.close() >>> g = Cabinet.BreeCab('/tmp/foo', 'rlz') # read the cabinet >>> g['a'] 'foo' >>> g.get('b', None) None >>> g.first() ('a', 'foo') For more information, see the shelve, anydbm and bsddb modules. """ license = """ Copyright (c) 1998 Mark Nottingham Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ __version__ = '1.12' ### WHY A DUMMY LOCKFILE? # # There are several ways to go about locking a file, and they're all painful. # Berkeley DB allows internal locking, but support (at least in bsddb) is # very limited, and platform-dependant. Of flock and lockf, lockf seems to be # GENERALLY preferred, and it at least has a prayer on NFS (although not a # very good one). # # So, why a dummy file instead of the dbm itself? The most obvious reason is # that different dbm implementations use different file naming schemes. Beyond # that, I've been told that there are issues in locking dbms generally, # because of the way the files are accessed. Unfortunately, I can't find a # reference for this suspicion, and if anyone can confidently dispel it, I'll # be more than glad to change the way this works. In the meantime, however, # locking an entire dummy file is the safest, most general way out. Performance # shouldn't suffer as long as it's closed after being used. If this isn't good # enough, you should probably be using a database anyway. from shelve import Shelf from string import join from time import time try: from zlib import compress, decompress except ImportError: pass try: from cPickle import Pickler, Unpickler except ImportError: from pickle import Pickler, Unpickler try: from cStringIO import StringIO except ImportError: from StringIO import StringIO CabinetError = 'Cabinet Error' class Cab(Shelf): """ Dummy class for real cabinets. """ def __init__(self): self.compression = 3 Shelf.__init__(self, self._dbh) def close(self): self.sync() self._dbh.close() if self._locked: self._unlock() def __del__(self): try: self.close() except: pass def __getitem__(self, key): return self._get_proc(self.dict[key]) def __setitem__(self, key, value): self.dict[key] = self._set_proc(value) if self._logged: self._log('SET', key, self.dict[key]) def __delitem__(self, key): del self.dict[key] if self._logged: self._log('DEL', key) def _get_proc(self, value): if self._compressed: f = StringIO(decompress(value)) else: f = StringIO(value) return Unpickler(f).load() def _set_proc(self, value): f = StringIO() p = Pickler(f) p.dump(value) if self._compressed: return compress(f.getvalue(), self.compression) else: return f.getvalue() def get(self, key, default): try: return self._get_proc(self.dict[key]) except KeyError: return default def dump(self, target=None): import pprint pp = pprint.PrettyPrinter(indent=4) if not target: for key in self.keys(): print key pp.pprint(self[key]) else: pp.pprint(self[target]) def _parse_mode(self): self._locked = 0 self._compressed = 0 self._logged = 0 self._open_mode = self.mode[0] if len(self.mode) > 1: for ch in self.mode[1:]: if ch == 'l': self._lock(self._open_mode) if ch == 'z': if not compress: raise CabinetError, 'zlib not availble' self._compressed = 1 if ch == 's': self._logged = 1 self._logfile = self.file + '.log' def _lock(self, lock_mode): import posixfile lockfile = self.file + '.lock' if lock_mode in 'cn': lock_mode = 'w' try: self._lfh = posixfile.open(lockfile, lock_mode) except IOError: tmp = posixfile.open(lockfile, 'w') tmp.close() self._lfh = posixfile.open(lockfile, lock_mode) self._lfh.lock(lock_mode + '|') self._locked = 1 def _unlock(self): self._lfh.lock('u') self._lfh.close() def _log(self, *args): log = open(self._logfile, 'a') log.write(str(time()) + '\t' + join(args, '\t') + '\n') log.close() class AnyCab(Cab): """ anydbm-based cabinet. """ def __init__(self, file, mode='c'): """ [file] is the name of the cabinet, as a full path. [mode] is the open mode desired. The first character should be: 'r' for read access 'w' for write access 'c' for write access, creating a new file if necessary 'n' for forced creation and then write access One can then add characters to specify options, including: 'l' to use file locking (lockf on dummy file) 's' to log transactions 'z' to compress the database """ import anydbm self.file = file self.mode = mode self._parse_mode() try: self._dbh = anydbm.open(self.file, self._open_mode) except anydbm.error, errinfo: if self._locked: self._unlock() raise CabinetError, errinfo Cab.__init__(self) class BtreeCab(Cab): """ bsddb b-tree based cabinet. This class relies on bsddb, which in turn relies on Berkeley DB 1.85. Because of dire warnings regarding Btrees with this particular implementation, it would be advisable to use Berkeley DB 2.x in 1.85 compatibility mode. """ def __init__(self, file, mode='c'): """ [file] is the name of the cabinet, as a full path. [mode] is the open mode desired. The first character should be: 'r' for read access 'w' for write access 'c' for write access, creating a new file if necessary 'n' for forced creation and then write access One can then add characters to specify options, including: 'l' to use file locking (lockf on dummy file) 's' to log transactions 'z' to compress the database """ import bsddb self.file = file self.mode = mode self._parse_mode() try: self._dbh = bsddb.btopen(self.file, self._open_mode) except bsddb.error, errinfo: if self._locked: self._unlock() raise CabinetError, errinfo Cab.__init__(self) def set_location(self, key): (key, value) = self.dict.set_location(key) return (key, self._get_proc(value)) def next(self): (key, value) = self.dict.next() return (key, self._get_proc(value)) def previous(self): (key, value) = self.dict.previous() return (key, self._get_proc(value)) def first(self): (key, value) = self.dict.first() return (key, self._get_proc(value)) def last(self): (key, value) = self.dict.last() return (key, self._get_proc(value)) def test(file='/tmp/Cabinet.test'): """ generic test suite. """ import time f = AnyCab(file, 'czl') f['a'] = 'foo' f['b'] = {'1': [1,2,3,4,5], '2': 'abcde'} f['c'] = "Python rules. Don't forget it. Or else." * 10000 f.sync() print 'opening read...' g = AnyCab(file, 'rzl') time.sleep(3) f.close() print g['a'] print g.get('d', 'ok.') g.dump('b') g.close() if __name__ == '__main__': test()