+++ /dev/null
-"""A dumb and slow but simple dbm clone.\r
-\r
-For database spam, spam.dir contains the index (a text file),\r
-spam.bak *may* contain a backup of the index (also a text file),\r
-while spam.dat contains the data (a binary file).\r
-\r
-XXX TO DO:\r
-\r
-- seems to contain a bug when updating...\r
-\r
-- reclaim free space (currently, space once occupied by deleted or expanded\r
-items is never reused)\r
-\r
-- support concurrent access (currently, if two processes take turns making\r
-updates, they can mess up the index)\r
-\r
-- support efficient access to large databases (currently, the whole index\r
-is read when the database is opened, and some updates rewrite the whole index)\r
-\r
-- support opening for read-only (flag = 'm')\r
-\r
-"""\r
-\r
-import os as _os\r
-import __builtin__\r
-import UserDict\r
-\r
-_open = __builtin__.open\r
-\r
-_BLOCKSIZE = 512\r
-\r
-error = IOError # For anydbm\r
-\r
-class _Database(UserDict.DictMixin):\r
-\r
- # The on-disk directory and data files can remain in mutually\r
- # inconsistent states for an arbitrarily long time (see comments\r
- # at the end of __setitem__). This is only repaired when _commit()\r
- # gets called. One place _commit() gets called is from __del__(),\r
- # and if that occurs at program shutdown time, module globals may\r
- # already have gotten rebound to None. Since it's crucial that\r
- # _commit() finish successfully, we can't ignore shutdown races\r
- # here, and _commit() must not reference any globals.\r
- _os = _os # for _commit()\r
- _open = _open # for _commit()\r
-\r
- def __init__(self, filebasename, mode):\r
- self._mode = mode\r
-\r
- # The directory file is a text file. Each line looks like\r
- # "%r, (%d, %d)\n" % (key, pos, siz)\r
- # where key is the string key, pos is the offset into the dat\r
- # file of the associated value's first byte, and siz is the number\r
- # of bytes in the associated value.\r
- self._dirfile = filebasename + _os.extsep + 'dir'\r
-\r
- # The data file is a binary file pointed into by the directory\r
- # file, and holds the values associated with keys. Each value\r
- # begins at a _BLOCKSIZE-aligned byte offset, and is a raw\r
- # binary 8-bit string value.\r
- self._datfile = filebasename + _os.extsep + 'dat'\r
- self._bakfile = filebasename + _os.extsep + 'bak'\r
-\r
- # The index is an in-memory dict, mirroring the directory file.\r
- self._index = None # maps keys to (pos, siz) pairs\r
-\r
- # Mod by Jack: create data file if needed\r
- try:\r
- f = _open(self._datfile, 'r')\r
- except IOError:\r
- f = _open(self._datfile, 'w')\r
- self._chmod(self._datfile)\r
- f.close()\r
- self._update()\r
-\r
- # Read directory file into the in-memory index dict.\r
- def _update(self):\r
- self._index = {}\r
- try:\r
- f = _open(self._dirfile)\r
- except IOError:\r
- pass\r
- else:\r
- for line in f:\r
- line = line.rstrip()\r
- key, pos_and_siz_pair = eval(line)\r
- self._index[key] = pos_and_siz_pair\r
- f.close()\r
-\r
- # Write the index dict to the directory file. The original directory\r
- # file (if any) is renamed with a .bak extension first. If a .bak\r
- # file currently exists, it's deleted.\r
- def _commit(self):\r
- # CAUTION: It's vital that _commit() succeed, and _commit() can\r
- # be called from __del__(). Therefore we must never reference a\r
- # global in this routine.\r
- if self._index is None:\r
- return # nothing to do\r
-\r
- try:\r
- self._os.unlink(self._bakfile)\r
- except self._os.error:\r
- pass\r
-\r
- try:\r
- self._os.rename(self._dirfile, self._bakfile)\r
- except self._os.error:\r
- pass\r
-\r
- f = self._open(self._dirfile, 'w')\r
- self._chmod(self._dirfile)\r
- for key, pos_and_siz_pair in self._index.iteritems():\r
- f.write("%r, %r\n" % (key, pos_and_siz_pair))\r
- f.close()\r
-\r
- sync = _commit\r
-\r
- def __getitem__(self, key):\r
- pos, siz = self._index[key] # may raise KeyError\r
- f = _open(self._datfile, 'rb')\r
- f.seek(pos)\r
- dat = f.read(siz)\r
- f.close()\r
- return dat\r
-\r
- # Append val to the data file, starting at a _BLOCKSIZE-aligned\r
- # offset. The data file is first padded with NUL bytes (if needed)\r
- # to get to an aligned offset. Return pair\r
- # (starting offset of val, len(val))\r
- def _addval(self, val):\r
- f = _open(self._datfile, 'rb+')\r
- f.seek(0, 2)\r
- pos = int(f.tell())\r
- npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE\r
- f.write('\0'*(npos-pos))\r
- pos = npos\r
- f.write(val)\r
- f.close()\r
- return (pos, len(val))\r
-\r
- # Write val to the data file, starting at offset pos. The caller\r
- # is responsible for ensuring that there's enough room starting at\r
- # pos to hold val, without overwriting some other value. Return\r
- # pair (pos, len(val)).\r
- def _setval(self, pos, val):\r
- f = _open(self._datfile, 'rb+')\r
- f.seek(pos)\r
- f.write(val)\r
- f.close()\r
- return (pos, len(val))\r
-\r
- # key is a new key whose associated value starts in the data file\r
- # at offset pos and with length siz. Add an index record to\r
- # the in-memory index dict, and append one to the directory file.\r
- def _addkey(self, key, pos_and_siz_pair):\r
- self._index[key] = pos_and_siz_pair\r
- f = _open(self._dirfile, 'a')\r
- self._chmod(self._dirfile)\r
- f.write("%r, %r\n" % (key, pos_and_siz_pair))\r
- f.close()\r
-\r
- def __setitem__(self, key, val):\r
- if not type(key) == type('') == type(val):\r
- raise TypeError, "keys and values must be strings"\r
- if key not in self._index:\r
- self._addkey(key, self._addval(val))\r
- else:\r
- # See whether the new value is small enough to fit in the\r
- # (padded) space currently occupied by the old value.\r
- pos, siz = self._index[key]\r
- oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE\r
- newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE\r
- if newblocks <= oldblocks:\r
- self._index[key] = self._setval(pos, val)\r
- else:\r
- # The new value doesn't fit in the (padded) space used\r
- # by the old value. The blocks used by the old value are\r
- # forever lost.\r
- self._index[key] = self._addval(val)\r
-\r
- # Note that _index may be out of synch with the directory\r
- # file now: _setval() and _addval() don't update the directory\r
- # file. This also means that the on-disk directory and data\r
- # files are in a mutually inconsistent state, and they'll\r
- # remain that way until _commit() is called. Note that this\r
- # is a disaster (for the database) if the program crashes\r
- # (so that _commit() never gets called).\r
-\r
- def __delitem__(self, key):\r
- # The blocks used by the associated value are lost.\r
- del self._index[key]\r
- # XXX It's unclear why we do a _commit() here (the code always\r
- # XXX has, so I'm not changing it). _setitem__ doesn't try to\r
- # XXX keep the directory file in synch. Why should we? Or\r
- # XXX why shouldn't __setitem__?\r
- self._commit()\r
-\r
- def keys(self):\r
- return self._index.keys()\r
-\r
- def has_key(self, key):\r
- return key in self._index\r
-\r
- def __contains__(self, key):\r
- return key in self._index\r
-\r
- def iterkeys(self):\r
- return self._index.iterkeys()\r
- __iter__ = iterkeys\r
-\r
- def __len__(self):\r
- return len(self._index)\r
-\r
- def close(self):\r
- self._commit()\r
- self._index = self._datfile = self._dirfile = self._bakfile = None\r
-\r
- __del__ = close\r
-\r
- def _chmod (self, file):\r
- if hasattr(self._os, 'chmod'):\r
- self._os.chmod(file, self._mode)\r
-\r
-\r
-def open(file, flag=None, mode=0666):\r
- """Open the database file, filename, and return corresponding object.\r
-\r
- The flag argument, used to control how the database is opened in the\r
- other DBM implementations, is ignored in the dumbdbm module; the\r
- database is always opened for update, and will be created if it does\r
- not exist.\r
-\r
- The optional mode argument is the UNIX mode of the file, used only when\r
- the database has to be created. It defaults to octal code 0666 (and\r
- will be modified by the prevailing umask).\r
-\r
- """\r
- # flag argument is currently ignored\r
-\r
- # Modify mode depending on the umask\r
- try:\r
- um = _os.umask(0)\r
- _os.umask(um)\r
- except AttributeError:\r
- pass\r
- else:\r
- # Turn off any bits that are set in the umask\r
- mode = mode & (~um)\r
-\r
- return _Database(file, mode)\r