| 
									
										
										
										
											1995-08-10 19:29:28 +00:00
										 |  |  | """A dumb and slow but simple dbm clone.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | For database spam, spam.dir contains the index (a text file), | 
					
						
							|  |  |  | spam.bak *may* contain a backup of the index (also a text file), | 
					
						
							|  |  |  | while spam.dat contains the data (a binary file). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | XXX TO DO: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - seems to contain a bug when updating... | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - reclaim free space (currently, space once occupied by deleted or expanded | 
					
						
							|  |  |  | items is never reused) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - support concurrent access (currently, if two processes take turns making | 
					
						
							|  |  |  | updates, they can mess up the index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - support efficient access to large databases (currently, the whole index | 
					
						
							|  |  |  | is read when the database is opened, and some updates rewrite the whole index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - support opening for read-only (flag = 'm') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-07-19 10:06:39 +00:00
										 |  |  | import os as _os | 
					
						
							| 
									
										
										
										
											1995-08-10 19:29:28 +00:00
										 |  |  | import __builtin__ | 
					
						
							| 
									
										
										
										
											2003-06-28 07:08:39 +00:00
										 |  |  | import UserDict | 
					
						
							| 
									
										
										
										
											1995-08-10 19:29:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | _open = __builtin__.open | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _BLOCKSIZE = 512 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  | error = IOError                         # For anydbm | 
					
						
							| 
									
										
										
										
											1996-05-28 22:58:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-06-28 07:08:39 +00:00
										 |  |  | class _Database(UserDict.DictMixin): | 
					
						
							| 
									
										
										
										
											1995-08-10 19:29:28 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-13 02:22:03 +00:00
										 |  |  |     # The on-disk directory and data files can remain in mutually | 
					
						
							|  |  |  |     # inconsistent states for an arbitrarily long time (see comments | 
					
						
							|  |  |  |     # at the end of __setitem__).  This is only repaired when _commit() | 
					
						
							|  |  |  |     # gets called.  One place _commit() gets called is from __del__(), | 
					
						
							|  |  |  |     # and if that occurs at program shutdown time, module globals may | 
					
						
							|  |  |  |     # already have gotten rebound to None.  Since it's crucial that | 
					
						
							| 
									
										
										
										
											2003-07-13 02:37:05 +00:00
										 |  |  |     # _commit() finish successfully, we can't ignore shutdown races | 
					
						
							| 
									
										
										
										
											2003-07-13 02:22:03 +00:00
										 |  |  |     # here, and _commit() must not reference any globals. | 
					
						
							|  |  |  |     _os = _os       # for _commit() | 
					
						
							|  |  |  |     _open = _open   # for _commit() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     def __init__(self, filebasename, mode): | 
					
						
							| 
									
										
										
										
											2001-12-07 21:54:46 +00:00
										 |  |  |         self._mode = mode | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # The directory file is a text file.  Each line looks like | 
					
						
							|  |  |  |         #    "%r, (%d, %d)\n" % (key, pos, siz) | 
					
						
							|  |  |  |         # where key is the string key, pos is the offset into the dat | 
					
						
							|  |  |  |         # file of the associated value's first byte, and siz is the number | 
					
						
							|  |  |  |         # of bytes in the associated value. | 
					
						
							|  |  |  |         self._dirfile = filebasename + _os.extsep + 'dir' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # The data file is a binary file pointed into by the directory | 
					
						
							|  |  |  |         # file, and holds the values associated with keys.  Each value | 
					
						
							|  |  |  |         # begins at a _BLOCKSIZE-aligned byte offset, and is a raw | 
					
						
							|  |  |  |         # binary 8-bit string value. | 
					
						
							|  |  |  |         self._datfile = filebasename + _os.extsep + 'dat' | 
					
						
							|  |  |  |         self._bakfile = filebasename + _os.extsep + 'bak' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # The index is an in-memory dict, mirroring the directory file. | 
					
						
							|  |  |  |         self._index = None  # maps keys to (pos, siz) pairs | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         # Mod by Jack: create data file if needed | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             f = _open(self._datfile, 'r') | 
					
						
							|  |  |  |         except IOError: | 
					
						
							| 
									
										
										
										
											2006-12-22 15:04:45 +00:00
										 |  |  |             f = _open(self._datfile, 'w') | 
					
						
							|  |  |  |             self._chmod(self._datfile) | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         f.close() | 
					
						
							|  |  |  |         self._update() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     # Read directory file into the in-memory index dict. | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |     def _update(self): | 
					
						
							|  |  |  |         self._index = {} | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             f = _open(self._dirfile) | 
					
						
							|  |  |  |         except IOError: | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |             for line in f: | 
					
						
							| 
									
										
										
										
											2005-06-07 19:36:10 +00:00
										 |  |  |                 line = line.rstrip() | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |                 key, pos_and_siz_pair = eval(line) | 
					
						
							|  |  |  |                 self._index[key] = pos_and_siz_pair | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |             f.close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     # Write the index dict to the directory file.  The original directory | 
					
						
							|  |  |  |     # file (if any) is renamed with a .bak extension first.  If a .bak | 
					
						
							|  |  |  |     # file currently exists, it's deleted. | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |     def _commit(self): | 
					
						
							| 
									
										
										
										
											2003-07-13 02:22:03 +00:00
										 |  |  |         # CAUTION:  It's vital that _commit() succeed, and _commit() can | 
					
						
							|  |  |  |         # be called from __del__().  Therefore we must never reference a | 
					
						
							|  |  |  |         # global in this routine. | 
					
						
							| 
									
										
										
										
											2003-07-13 17:21:10 +00:00
										 |  |  |         if self._index is None: | 
					
						
							|  |  |  |             return  # nothing to do | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2003-07-13 02:22:03 +00:00
										 |  |  |             self._os.unlink(self._bakfile) | 
					
						
							|  |  |  |         except self._os.error: | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |             pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2003-07-13 02:22:03 +00:00
										 |  |  |             self._os.rename(self._dirfile, self._bakfile) | 
					
						
							|  |  |  |         except self._os.error: | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |             pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-12-22 15:04:45 +00:00
										 |  |  |         f = self._open(self._dirfile, 'w') | 
					
						
							|  |  |  |         self._chmod(self._dirfile) | 
					
						
							| 
									
										
										
										
											2003-07-12 20:23:09 +00:00
										 |  |  |         for key, pos_and_siz_pair in self._index.iteritems(): | 
					
						
							|  |  |  |             f.write("%r, %r\n" % (key, pos_and_siz_pair)) | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         f.close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-14 12:15:15 +00:00
										 |  |  |     sync = _commit | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |     def __getitem__(self, key): | 
					
						
							|  |  |  |         pos, siz = self._index[key]     # may raise KeyError | 
					
						
							|  |  |  |         f = _open(self._datfile, 'rb') | 
					
						
							|  |  |  |         f.seek(pos) | 
					
						
							|  |  |  |         dat = f.read(siz) | 
					
						
							|  |  |  |         f.close() | 
					
						
							|  |  |  |         return dat | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     # Append val to the data file, starting at a _BLOCKSIZE-aligned | 
					
						
							|  |  |  |     # offset.  The data file is first padded with NUL bytes (if needed) | 
					
						
							|  |  |  |     # to get to an aligned offset.  Return pair | 
					
						
							|  |  |  |     #     (starting offset of val, len(val)) | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |     def _addval(self, val): | 
					
						
							|  |  |  |         f = _open(self._datfile, 'rb+') | 
					
						
							|  |  |  |         f.seek(0, 2) | 
					
						
							|  |  |  |         pos = int(f.tell()) | 
					
						
							| 
									
										
										
										
											2001-09-04 19:14:14 +00:00
										 |  |  |         npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         f.write('\0'*(npos-pos)) | 
					
						
							|  |  |  |         pos = npos | 
					
						
							|  |  |  |         f.write(val) | 
					
						
							|  |  |  |         f.close() | 
					
						
							|  |  |  |         return (pos, len(val)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     # Write val to the data file, starting at offset pos.  The caller | 
					
						
							|  |  |  |     # is responsible for ensuring that there's enough room starting at | 
					
						
							|  |  |  |     # pos to hold val, without overwriting some other value.  Return | 
					
						
							|  |  |  |     # pair (pos, len(val)). | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |     def _setval(self, pos, val): | 
					
						
							|  |  |  |         f = _open(self._datfile, 'rb+') | 
					
						
							|  |  |  |         f.seek(pos) | 
					
						
							|  |  |  |         f.write(val) | 
					
						
							|  |  |  |         f.close() | 
					
						
							|  |  |  |         return (pos, len(val)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     # key is a new key whose associated value starts in the data file | 
					
						
							| 
									
										
										
										
											2003-07-13 02:05:47 +00:00
										 |  |  |     # at offset pos and with length siz.  Add an index record to | 
					
						
							|  |  |  |     # the in-memory index dict, and append one to the directory file. | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |     def _addkey(self, key, pos_and_siz_pair): | 
					
						
							|  |  |  |         self._index[key] = pos_and_siz_pair | 
					
						
							| 
									
										
										
										
											2006-12-22 15:04:45 +00:00
										 |  |  |         f = _open(self._dirfile, 'a') | 
					
						
							|  |  |  |         self._chmod(self._dirfile) | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |         f.write("%r, %r\n" % (key, pos_and_siz_pair)) | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         f.close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __setitem__(self, key, val): | 
					
						
							|  |  |  |         if not type(key) == type('') == type(val): | 
					
						
							|  |  |  |             raise TypeError, "keys and values must be strings" | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |         if key not in self._index: | 
					
						
							|  |  |  |             self._addkey(key, self._addval(val)) | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |             # See whether the new value is small enough to fit in the | 
					
						
							|  |  |  |             # (padded) space currently occupied by the old value. | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |             pos, siz = self._index[key] | 
					
						
							| 
									
										
										
										
											2003-07-11 04:09:55 +00:00
										 |  |  |             oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE | 
					
						
							|  |  |  |             newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |             if newblocks <= oldblocks: | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |                 self._index[key] = self._setval(pos, val) | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |                 # The new value doesn't fit in the (padded) space used | 
					
						
							|  |  |  |                 # by the old value.  The blocks used by the old value are | 
					
						
							|  |  |  |                 # forever lost. | 
					
						
							|  |  |  |                 self._index[key] = self._addval(val) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # Note that _index may be out of synch with the directory | 
					
						
							|  |  |  |             # file now:  _setval() and _addval() don't update the directory | 
					
						
							| 
									
										
										
										
											2003-07-13 02:05:47 +00:00
										 |  |  |             # file.  This also means that the on-disk directory and data | 
					
						
							|  |  |  |             # files are in a mutually inconsistent state, and they'll | 
					
						
							|  |  |  |             # remain that way until _commit() is called.  Note that this | 
					
						
							|  |  |  |             # is a disaster (for the database) if the program crashes | 
					
						
							|  |  |  |             # (so that _commit() never gets called). | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def __delitem__(self, key): | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |         # The blocks used by the associated value are lost. | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         del self._index[key] | 
					
						
							| 
									
										
										
										
											2003-07-12 20:11:25 +00:00
										 |  |  |         # XXX It's unclear why we do a _commit() here (the code always | 
					
						
							|  |  |  |         # XXX has, so I'm not changing it).  _setitem__ doesn't try to | 
					
						
							|  |  |  |         # XXX keep the directory file in synch.  Why should we?  Or | 
					
						
							|  |  |  |         # XXX why shouldn't __setitem__? | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |         self._commit() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def keys(self): | 
					
						
							|  |  |  |         return self._index.keys() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def has_key(self, key): | 
					
						
							| 
									
										
										
										
											2002-06-01 14:25:41 +00:00
										 |  |  |         return key in self._index | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-05-03 04:55:47 +00:00
										 |  |  |     def __contains__(self, key): | 
					
						
							| 
									
										
										
										
											2002-06-01 14:25:41 +00:00
										 |  |  |         return key in self._index | 
					
						
							| 
									
										
										
										
											2001-05-03 04:55:47 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def iterkeys(self): | 
					
						
							|  |  |  |         return self._index.iterkeys() | 
					
						
							|  |  |  |     __iter__ = iterkeys | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 |  |  |     def __len__(self): | 
					
						
							|  |  |  |         return len(self._index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def close(self): | 
					
						
							| 
									
										
										
										
											2001-12-21 05:13:37 +00:00
										 |  |  |         self._commit() | 
					
						
							| 
									
										
										
										
											2003-07-13 17:21:10 +00:00
										 |  |  |         self._index = self._datfile = self._dirfile = self._bakfile = None | 
					
						
							| 
									
										
										
										
											1995-08-10 19:29:28 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-07-13 17:21:10 +00:00
										 |  |  |     __del__ = close | 
					
						
							| 
									
										
										
										
											2002-02-16 07:34:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-12-22 15:04:45 +00:00
										 |  |  |     def _chmod (self, file): | 
					
						
							|  |  |  |         if hasattr(self._os, 'chmod'): | 
					
						
							|  |  |  |             self._os.chmod(file, self._mode) | 
					
						
							| 
									
										
										
										
											2001-12-21 05:13:37 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1995-08-10 19:29:28 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-12-07 21:54:46 +00:00
										 |  |  | def open(file, flag=None, mode=0666): | 
					
						
							| 
									
										
										
										
											2002-05-29 16:18:42 +00:00
										 |  |  |     """Open the database file, filename, and return corresponding object.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The flag argument, used to control how the database is opened in the | 
					
						
							|  |  |  |     other DBM implementations, is ignored in the dumbdbm module; the | 
					
						
							|  |  |  |     database is always opened for update, and will be created if it does | 
					
						
							|  |  |  |     not exist. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The optional mode argument is the UNIX mode of the file, used only when | 
					
						
							|  |  |  |     the database has to be created.  It defaults to octal code 0666 (and | 
					
						
							|  |  |  |     will be modified by the prevailing umask). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2003-03-01 22:58:00 +00:00
										 |  |  |     # flag argument is currently ignored | 
					
						
							| 
									
										
										
										
											2006-12-22 15:16:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Modify mode depending on the umask | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         um = _os.umask(0) | 
					
						
							|  |  |  |         _os.umask(um) | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # Turn off any bits that are set in the umask | 
					
						
							|  |  |  |         mode = mode & (~um) | 
					
						
							|  |  |  |          | 
					
						
							| 
									
										
										
										
											2001-12-07 21:54:46 +00:00
										 |  |  |     return _Database(file, mode) |