Welcome, guest | Sign In | My Account | Store | Cart

This recipe provides a class which will read a text file in reverse... It basically reads a block of data from the end of the file as a list and keeps popping items off of that everytime the readline() method is called. When the block is exhausted, another block is read, and so forth... This takes care of corner cases where a line is longer than the buffer or the file is smaller than the buffer, etc.

Python, 80 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
import sys
import os
import string

"""read a file returning the lines in reverse order for each call of readline()
This actually just reads blocks (4096 bytes by default) of data from the end of
the file and returns last line in an internal buffer.  I believe all the corner
cases are handled, but never can be sure..."""

class BackwardsReader:
  def readline(self):
    while len(self.data) == 1 and ((self.blkcount * self.blksize) < self.size):
      self.blkcount = self.blkcount + 1
      line = self.data[0]
      try:
        self.f.seek(-self.blksize * self.blkcount, 2) # read from end of file
        self.data = string.split(self.f.read(self.blksize) + line, '\n')
      except IOError:  # can't seek before the beginning of the file
        self.f.seek(0)
        self.data = string.split(self.f.read(self.size - (self.blksize * (self.blkcount-1))) + line, '\n')

    if len(self.data) == 0:
      return ""

    # self.data.pop()
    # make it compatible with python <= 1.5.1
    line = self.data[-1]
    self.data = self.data[:-1]
    return line + '\n'

  def __init__(self, file, blksize=4096):
    """initialize the internal structures"""
    # get the file size
    self.size = os.stat(file)[6]
    # how big of a block to read from the file...
    self.blksize = blksize
    # how many blocks we've read
    self.blkcount = 1
    self.f = open(file, 'rb')
    # if the file is smaller than the blocksize, read a block,
    # otherwise, read the whole thing...
    if self.size > self.blksize:
      self.f.seek(-self.blksize * self.blkcount, 2) # read from end of file
    self.data = string.split(self.f.read(self.blksize), '\n')
    # strip the last item if it's empty...  a byproduct of the last line having
    # a newline at the end of it
    if not self.data[-1]:
      # self.data.pop()
      self.data = self.data[:-1]


if(__name__ == "__main__"):
  # do a thorough test...

  f = open('br.py', 'r')
  lines = []
  line = f.readline()
  while line:
    lines.append(line)
    line = f.readline()

  f.close()

  lines.reverse()

  for i in range(1, 5000):  # test different buffer sizes...
    foo = BackwardsReader('br.py', i)

    linesbr = []
    line = foo.readline()
    while line:
      linesbr.append(line)
      line = foo.readline()

    if linesbr != lines:
      print "\nNOT MATCHED  %5d" % (i)
    else:
      print "MATCHED %5d\r" % (i),
      sys.stdout.flush()

There are probably more concise implementations...