|
Description:
Reader iterates over records in Dbase or Xbase files.
Writer creates dbf files from Python sequences.
Source: Text Source
import struct, datetime, decimal, itertools
def dbfreader(f):
"""Returns an iterator over records in a Xbase DBF file.
The first row returned contains the field names.
The second row contains field specs: (type, size, decimal places).
Subsequent rows contain the data records.
If a record is marked as deleted, it is skipped.
File should be opened for binary reads.
"""
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
numfields = (lenheader - 33) // 32
fields = []
for fieldno in xrange(numfields):
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
name = name.replace('\0', '')
fields.append((name, typ, size, deci))
yield [field[0] for field in fields]
yield [tuple(field[1:]) for field in fields]
terminator = f.read(1)
assert terminator == '\r'
fields.insert(0, ('DeletionFlag', 'C', 1, 0))
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
fmtsiz = struct.calcsize(fmt)
for i in xrange(numrec):
record = struct.unpack(fmt, f.read(fmtsiz))
if record[0] != ' ':
continue
result = []
for (name, typ, size, deci), value in itertools.izip(fields, record):
if name == 'DeletionFlag':
continue
if typ == "N":
value = value.replace('\0', '').lstrip()
if value == '':
value = 0
elif deci:
value = decimal.Decimal(value)
else:
value = int(value)
elif typ == 'D':
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
value = datetime.date(y, m, d)
elif typ == 'L':
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
result.append(value)
yield result
def dbfwriter(f, fieldnames, fieldspecs, records):
""" Return a string suitable for writing directly to a binary dbf file.
File f should be open for writing in a binary mode.
Fieldnames should be no longer than ten characters and not include \x00.
Fieldspecs are in the form (type, size, deci) where
type is one of:
C for ascii character data
M for ascii character memo data (real memo fields not supported)
D for datetime objects
N for ints or decimal objects
L for logical values 'T', 'F', or '?'
size is the field width
deci is the number of decimal places in the provided decimal object
Records can be an iterable over the records (sequences of field values).
"""
ver = 3
now = datetime.datetime.now()
yr, mon, day = now.year-1900, now.month, now.day
numrec = len(records)
numfields = len(fieldspecs)
lenheader = numfields * 32 + 33
lenrecord = sum(field[1] for field in fieldspecs) + 1
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
f.write(hdr)
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
name = name.ljust(11, '\x00')
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
f.write(fld)
f.write('\r')
for record in records:
f.write(' ')
for (typ, size, deci), value in itertools.izip(fieldspecs, record):
if typ == "N":
value = str(value).rjust(size, ' ')
elif typ == 'D':
value = value.strftime('%Y%m%d')
elif typ == 'L':
value = str(value)[0].upper()
else:
value = str(value)[:size].ljust(size, ' ')
assert len(value) == size
f.write(value)
f.write('\x1A')
if __name__ == '__main__':
import sys, csv
from cStringIO import StringIO
from operator import itemgetter
filename = '/pydev/databases/orders.dbf'
if len(sys.argv) == 2:
filename = sys.argv[1]
f = open(filename, 'rb')
db = list(dbfreader(f))
f.close()
for record in db:
print record
fieldnames, fieldspecs, records = db[0], db[1], db[2:]
del records[4]
records.sort(key=itemgetter(4))
del fieldnames[0]
del fieldspecs[0]
records = [rec[1:] for rec in records]
f = StringIO()
dbfwriter(f, fieldnames, fieldspecs, records)
print '-' * 20
f.seek(0)
for line in dbfreader(f):
print line
f.close()
print '.' * 20
f = StringIO()
csv.writer(f).writerow(fieldnames)
csv.writer(f).writerows(records)
print f.getvalue()
f.close()
"""
['ORDER_ID', 'CUSTMR_ID', 'EMPLOY_ID', 'ORDER_DATE', 'ORDER_AMT']
[('C', 10, 0), ('C', 11, 0), ('C', 11, 0), ('D', 8, 0), ('N', 12, 2)]
['10005 ', 'WALNG ', '555 ', datetime.date(1995, 5, 22), Decimal("173.40")]
['10004 ', 'BMARK ', '777 ', datetime.date(1995, 5, 18), Decimal("3194.20")]
['10029 ', 'SAWYH ', '777 ', datetime.date(1995, 6, 29), Decimal("97.30")]
['10013 ', 'RITEB ', '777 ', datetime.date(1995, 6, 2), Decimal("560.40")]
['10024 ', 'RATTC ', '444 ', datetime.date(1995, 6, 21), Decimal("2223.50")]
['10018 ', 'RATTC ', '444 ', datetime.date(1995, 6, 12), Decimal("1076.05")]
['10025 ', 'RATTC ', '444 ', datetime.date(1995, 6, 23), Decimal("185.80")]
['10038 ', 'OLDWO ', '111 ', datetime.date(1995, 7, 14), Decimal("863.96")]
['10002 ', 'MTIME ', '333 ', datetime.date(1995, 5, 16), Decimal("731.80")]
['10007 ', 'MORNS ', '444 ', datetime.date(1995, 5, 24), Decimal("1405.00")]
['10026 ', 'MORNS ', '555 ', datetime.date(1995, 6, 26), Decimal("17.40")]
['10030 ', 'LILLO ', '111 ', datetime.date(1995, 7, 3), Decimal("909.91")]
['10022 ', 'LAPLA ', '111 ', datetime.date(1995, 6, 19), Decimal("671.50")]
['10035 ', 'HIGHG ', '111 ', datetime.date(1995, 7, 11), Decimal("1984.83")]
['10033 ', 'FOODG ', '333 ', datetime.date(1995, 7, 6), Decimal("3401.32")]
--------------------
['CUSTMR_ID', 'EMPLOY_ID', 'ORDER_DATE', 'ORDER_AMT']
[('C', 11, 0), ('C', 11, 0), ('D', 8, 0), ('N', 12, 2)]
['MORNS ', '555 ', datetime.date(1995, 6, 26), Decimal("17.40")]
['SAWYH ', '777 ', datetime.date(1995, 6, 29), Decimal("97.30")]
['WALNG ', '555 ', datetime.date(1995, 5, 22), Decimal("173.40")]
['RATTC ', '444 ', datetime.date(1995, 6, 23), Decimal("185.80")]
['RITEB ', '777 ', datetime.date(1995, 6, 2), Decimal("560.40")]
['LAPLA ', '111 ', datetime.date(1995, 6, 19), Decimal("671.50")]
['MTIME ', '333 ', datetime.date(1995, 5, 16), Decimal("731.80")]
['OLDWO ', '111 ', datetime.date(1995, 7, 14), Decimal("863.96")]
['LILLO ', '111 ', datetime.date(1995, 7, 3), Decimal("909.91")]
['RATTC ', '444 ', datetime.date(1995, 6, 12), Decimal("1076.05")]
['MORNS ', '444 ', datetime.date(1995, 5, 24), Decimal("1405.00")]
['HIGHG ', '111 ', datetime.date(1995, 7, 11), Decimal("1984.83")]
['BMARK ', '777 ', datetime.date(1995, 5, 18), Decimal("3194.20")]
['FOODG ', '333 ', datetime.date(1995, 7, 6), Decimal("3401.32")]
....................
CUSTMR_ID,EMPLOY_ID,ORDER_DATE,ORDER_AMT
MORNS ,555 ,1995-06-26,17.40
SAWYH ,777 ,1995-06-29,97.30
WALNG ,555 ,1995-05-22,173.40
RATTC ,444 ,1995-06-23,185.80
RITEB ,777 ,1995-06-02,560.40
LAPLA ,111 ,1995-06-19,671.50
MTIME ,333 ,1995-05-16,731.80
OLDWO ,111 ,1995-07-14,863.96
LILLO ,111 ,1995-07-03,909.91
RATTC ,444 ,1995-06-12,1076.05
MORNS ,444 ,1995-05-24,1405.00
HIGHG ,111 ,1995-07-11,1984.83
BMARK ,777 ,1995-05-18,3194.20
FOODG ,333 ,1995-07-06,3401.32
"""
Discussion:
The dbf file format is old, precise, commonplace, and widely supported by everything from calendar software in PDAs, to contact managers, to Excel and Access. It can be a good way to get Python to interoperate with pre-existing, non-Python apps.
|