Read a file line by line starting at the end
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # copyright 2004 Michael D. Stenner <mstenner@ece.arizona.edu>
# license: LGPL
class xreverse:
def __init__(self, file_object, buf_size=1024*8):
self.fo = fo = file_object
fo.seek(0, 2) # go to the end of the file
self.pos = fo.tell() # where we are
self.buffer = '' # data buffer
self.lbuf = [] # buffer for parsed lines
self.done = 0 # we've read the last line
self.jump = -1 * buf_size
while 1:
try: fo.seek(self.jump, 1)
except IOError: fo.seek(0)
new_position = fo.tell()
new = fo.read(self.pos - new_position)
fo.seek(new_position)
self.pos = new_position
self.buffer = new + self.buffer
if '\n' in new: break
if self.pos == 0: return self.buffer
nl = self.buffer.split('\n')
nlb = [ i + '\n' for i in nl[1:-1] ]
if not self.buffer[-1] == '\n': nlb.append(nl[-1])
self.buffer = nl[0]
self.lbuf = nlb
def __iter__(self): return self
def next(self):
try:
return self.lbuf.pop()
except IndexError:
fo = self.fo
while 1:
#get the next chunk of data
try: fo.seek(self.jump, 1)
except IOError: fo.seek(0)
new_position = fo.tell()
new = fo.read(self.pos - new_position)
fo.seek(new_position)
self.pos = new_position
nl = (new + self.buffer).split('\n')
self.buffer = nl.pop(0)
self.lbuf = [ i + '\n' for i in nl ]
if self.lbuf: return self.lbuf.pop()
elif self.pos == 0:
if self.done:
raise StopIteration
else:
self.done = 1
return self.buffer + '\n'
def dump(rtype, fn):
import sys
fo = file(fn)
for line in rtype(fo):
sys.stdout.write(line)
fo.close()
def rereverse(rtype, fn):
fo = file(fn)
rev = [ line for line in xreverse(fo) ]
rev.reverse()
sys.stdout.writelines(rev)
def test_compare(rtype, fn):
import sys, os
t1 = os.times()
fo = file(fn)
for line in rtype(fo):
pass
fo.close()
t2 = os.times()
fo = file(fn)
for line in fo.readlines():
pass
fo.close()
t3 = os.times()
for i in range(5):
print t2[i] - t1[i], t3[i] - t2[i]
if __name__ == '__main__':
import sys
fn = sys.argv[1]
#dump(xreverse, fn)
#test_compare(xreverse, fn)
rereverse(xreverse, fn)
|
Originally used for checking the end of mbox files.
I use it to read a log file backwards, in order to implement undo.
Reverse file. Why not just do this, assuming f is an open file?
or if you want the last few lines of a file you can do something like
This will give you the last 5 lines in a file and then reverse them.
I'm not questioning the usefulness of your code. I'm just wondering what it buys me over the few lines above. Assuming that it uses less memory, how large a file do you need to read before it becomes worthwhile?
Memory. I use it for reading big log files, I think 10Mb is big enough to warrant using it. Think "Do I want my program to increase in memory usage the size of this file?"
Also if your program uses lots of log files, like if you want to search for error messages in the last day of 100x10mb log file, I imagine it would be slower to load each file into memory (create list objects), then free the memory again, than two do a few disk reads.
Basically it's a 'tail' for python. I haven't done any speed or memory efficiency tests, it just seems like a cleaner way to do things.
Eg. You could use 'less' and 'G' (go to end) to read the end of a log file, or you could use 'tail'. If what you're looking for is probably in the last few lines, better to start looking there :)
BTW: I was given this code by Michael D. Stenner (the guy with the copyright) who wrote it to read mbox files or something, he posted it in IRC on #python on FreeNode network. He gave me permission to postit here. I hope everyone enjoys it :)
www.sherborneinternational.com
Aha. That's the part that would have been nice to have in the discussion. I had assumed there was a good reason for all that code vs. the much more succinct method available in normal python syntax.
Thanks for the explanation.