]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | """A dumb and slow but simple dbm clone.\r |
2 | \r | |
3 | For database spam, spam.dir contains the index (a text file),\r | |
4 | spam.bak *may* contain a backup of the index (also a text file),\r | |
5 | while spam.dat contains the data (a binary file).\r | |
6 | \r | |
7 | XXX TO DO:\r | |
8 | \r | |
9 | - seems to contain a bug when updating...\r | |
10 | \r | |
11 | - reclaim free space (currently, space once occupied by deleted or expanded\r | |
12 | items is never reused)\r | |
13 | \r | |
14 | - support concurrent access (currently, if two processes take turns making\r | |
15 | updates, they can mess up the index)\r | |
16 | \r | |
17 | - support efficient access to large databases (currently, the whole index\r | |
18 | is read when the database is opened, and some updates rewrite the whole index)\r | |
19 | \r | |
20 | - support opening for read-only (flag = 'm')\r | |
21 | \r | |
22 | """\r | |
23 | \r | |
24 | import os as _os\r | |
25 | import __builtin__\r | |
26 | import UserDict\r | |
27 | \r | |
28 | _open = __builtin__.open\r | |
29 | \r | |
30 | _BLOCKSIZE = 512\r | |
31 | \r | |
32 | error = IOError # For anydbm\r | |
33 | \r | |
34 | class _Database(UserDict.DictMixin):\r | |
35 | \r | |
36 | # The on-disk directory and data files can remain in mutually\r | |
37 | # inconsistent states for an arbitrarily long time (see comments\r | |
38 | # at the end of __setitem__). This is only repaired when _commit()\r | |
39 | # gets called. One place _commit() gets called is from __del__(),\r | |
40 | # and if that occurs at program shutdown time, module globals may\r | |
41 | # already have gotten rebound to None. Since it's crucial that\r | |
42 | # _commit() finish successfully, we can't ignore shutdown races\r | |
43 | # here, and _commit() must not reference any globals.\r | |
44 | _os = _os # for _commit()\r | |
45 | _open = _open # for _commit()\r | |
46 | \r | |
47 | def __init__(self, filebasename, mode):\r | |
48 | self._mode = mode\r | |
49 | \r | |
50 | # The directory file is a text file. Each line looks like\r | |
51 | # "%r, (%d, %d)\n" % (key, pos, siz)\r | |
52 | # where key is the string key, pos is the offset into the dat\r | |
53 | # file of the associated value's first byte, and siz is the number\r | |
54 | # of bytes in the associated value.\r | |
55 | self._dirfile = filebasename + _os.extsep + 'dir'\r | |
56 | \r | |
57 | # The data file is a binary file pointed into by the directory\r | |
58 | # file, and holds the values associated with keys. Each value\r | |
59 | # begins at a _BLOCKSIZE-aligned byte offset, and is a raw\r | |
60 | # binary 8-bit string value.\r | |
61 | self._datfile = filebasename + _os.extsep + 'dat'\r | |
62 | self._bakfile = filebasename + _os.extsep + 'bak'\r | |
63 | \r | |
64 | # The index is an in-memory dict, mirroring the directory file.\r | |
65 | self._index = None # maps keys to (pos, siz) pairs\r | |
66 | \r | |
67 | # Mod by Jack: create data file if needed\r | |
68 | try:\r | |
69 | f = _open(self._datfile, 'r')\r | |
70 | except IOError:\r | |
71 | f = _open(self._datfile, 'w')\r | |
72 | self._chmod(self._datfile)\r | |
73 | f.close()\r | |
74 | self._update()\r | |
75 | \r | |
76 | # Read directory file into the in-memory index dict.\r | |
77 | def _update(self):\r | |
78 | self._index = {}\r | |
79 | try:\r | |
80 | f = _open(self._dirfile)\r | |
81 | except IOError:\r | |
82 | pass\r | |
83 | else:\r | |
84 | for line in f:\r | |
85 | line = line.rstrip()\r | |
86 | key, pos_and_siz_pair = eval(line)\r | |
87 | self._index[key] = pos_and_siz_pair\r | |
88 | f.close()\r | |
89 | \r | |
90 | # Write the index dict to the directory file. The original directory\r | |
91 | # file (if any) is renamed with a .bak extension first. If a .bak\r | |
92 | # file currently exists, it's deleted.\r | |
93 | def _commit(self):\r | |
94 | # CAUTION: It's vital that _commit() succeed, and _commit() can\r | |
95 | # be called from __del__(). Therefore we must never reference a\r | |
96 | # global in this routine.\r | |
97 | if self._index is None:\r | |
98 | return # nothing to do\r | |
99 | \r | |
100 | try:\r | |
101 | self._os.unlink(self._bakfile)\r | |
102 | except self._os.error:\r | |
103 | pass\r | |
104 | \r | |
105 | try:\r | |
106 | self._os.rename(self._dirfile, self._bakfile)\r | |
107 | except self._os.error:\r | |
108 | pass\r | |
109 | \r | |
110 | f = self._open(self._dirfile, 'w')\r | |
111 | self._chmod(self._dirfile)\r | |
112 | for key, pos_and_siz_pair in self._index.iteritems():\r | |
113 | f.write("%r, %r\n" % (key, pos_and_siz_pair))\r | |
114 | f.close()\r | |
115 | \r | |
116 | sync = _commit\r | |
117 | \r | |
118 | def __getitem__(self, key):\r | |
119 | pos, siz = self._index[key] # may raise KeyError\r | |
120 | f = _open(self._datfile, 'rb')\r | |
121 | f.seek(pos)\r | |
122 | dat = f.read(siz)\r | |
123 | f.close()\r | |
124 | return dat\r | |
125 | \r | |
126 | # Append val to the data file, starting at a _BLOCKSIZE-aligned\r | |
127 | # offset. The data file is first padded with NUL bytes (if needed)\r | |
128 | # to get to an aligned offset. Return pair\r | |
129 | # (starting offset of val, len(val))\r | |
130 | def _addval(self, val):\r | |
131 | f = _open(self._datfile, 'rb+')\r | |
132 | f.seek(0, 2)\r | |
133 | pos = int(f.tell())\r | |
134 | npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE\r | |
135 | f.write('\0'*(npos-pos))\r | |
136 | pos = npos\r | |
137 | f.write(val)\r | |
138 | f.close()\r | |
139 | return (pos, len(val))\r | |
140 | \r | |
141 | # Write val to the data file, starting at offset pos. The caller\r | |
142 | # is responsible for ensuring that there's enough room starting at\r | |
143 | # pos to hold val, without overwriting some other value. Return\r | |
144 | # pair (pos, len(val)).\r | |
145 | def _setval(self, pos, val):\r | |
146 | f = _open(self._datfile, 'rb+')\r | |
147 | f.seek(pos)\r | |
148 | f.write(val)\r | |
149 | f.close()\r | |
150 | return (pos, len(val))\r | |
151 | \r | |
152 | # key is a new key whose associated value starts in the data file\r | |
153 | # at offset pos and with length siz. Add an index record to\r | |
154 | # the in-memory index dict, and append one to the directory file.\r | |
155 | def _addkey(self, key, pos_and_siz_pair):\r | |
156 | self._index[key] = pos_and_siz_pair\r | |
157 | f = _open(self._dirfile, 'a')\r | |
158 | self._chmod(self._dirfile)\r | |
159 | f.write("%r, %r\n" % (key, pos_and_siz_pair))\r | |
160 | f.close()\r | |
161 | \r | |
162 | def __setitem__(self, key, val):\r | |
163 | if not type(key) == type('') == type(val):\r | |
164 | raise TypeError, "keys and values must be strings"\r | |
165 | if key not in self._index:\r | |
166 | self._addkey(key, self._addval(val))\r | |
167 | else:\r | |
168 | # See whether the new value is small enough to fit in the\r | |
169 | # (padded) space currently occupied by the old value.\r | |
170 | pos, siz = self._index[key]\r | |
171 | oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE\r | |
172 | newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE\r | |
173 | if newblocks <= oldblocks:\r | |
174 | self._index[key] = self._setval(pos, val)\r | |
175 | else:\r | |
176 | # The new value doesn't fit in the (padded) space used\r | |
177 | # by the old value. The blocks used by the old value are\r | |
178 | # forever lost.\r | |
179 | self._index[key] = self._addval(val)\r | |
180 | \r | |
181 | # Note that _index may be out of synch with the directory\r | |
182 | # file now: _setval() and _addval() don't update the directory\r | |
183 | # file. This also means that the on-disk directory and data\r | |
184 | # files are in a mutually inconsistent state, and they'll\r | |
185 | # remain that way until _commit() is called. Note that this\r | |
186 | # is a disaster (for the database) if the program crashes\r | |
187 | # (so that _commit() never gets called).\r | |
188 | \r | |
189 | def __delitem__(self, key):\r | |
190 | # The blocks used by the associated value are lost.\r | |
191 | del self._index[key]\r | |
192 | # XXX It's unclear why we do a _commit() here (the code always\r | |
193 | # XXX has, so I'm not changing it). _setitem__ doesn't try to\r | |
194 | # XXX keep the directory file in synch. Why should we? Or\r | |
195 | # XXX why shouldn't __setitem__?\r | |
196 | self._commit()\r | |
197 | \r | |
198 | def keys(self):\r | |
199 | return self._index.keys()\r | |
200 | \r | |
201 | def has_key(self, key):\r | |
202 | return key in self._index\r | |
203 | \r | |
204 | def __contains__(self, key):\r | |
205 | return key in self._index\r | |
206 | \r | |
207 | def iterkeys(self):\r | |
208 | return self._index.iterkeys()\r | |
209 | __iter__ = iterkeys\r | |
210 | \r | |
211 | def __len__(self):\r | |
212 | return len(self._index)\r | |
213 | \r | |
214 | def close(self):\r | |
215 | self._commit()\r | |
216 | self._index = self._datfile = self._dirfile = self._bakfile = None\r | |
217 | \r | |
218 | __del__ = close\r | |
219 | \r | |
220 | def _chmod (self, file):\r | |
221 | if hasattr(self._os, 'chmod'):\r | |
222 | self._os.chmod(file, self._mode)\r | |
223 | \r | |
224 | \r | |
225 | def open(file, flag=None, mode=0666):\r | |
226 | """Open the database file, filename, and return corresponding object.\r | |
227 | \r | |
228 | The flag argument, used to control how the database is opened in the\r | |
229 | other DBM implementations, is ignored in the dumbdbm module; the\r | |
230 | database is always opened for update, and will be created if it does\r | |
231 | not exist.\r | |
232 | \r | |
233 | The optional mode argument is the UNIX mode of the file, used only when\r | |
234 | the database has to be created. It defaults to octal code 0666 (and\r | |
235 | will be modified by the prevailing umask).\r | |
236 | \r | |
237 | """\r | |
238 | # flag argument is currently ignored\r | |
239 | \r | |
240 | # Modify mode depending on the umask\r | |
241 | try:\r | |
242 | um = _os.umask(0)\r | |
243 | _os.umask(um)\r | |
244 | except AttributeError:\r | |
245 | pass\r | |
246 | else:\r | |
247 | # Turn off any bits that are set in the umask\r | |
248 | mode = mode & (~um)\r | |
249 | \r | |
250 | return _Database(file, mode)\r |