|
Description:
Files existing on damaged media (eg old CD-ROMs) are not completely lost. Often, readers will read sectors unreadable in other readers. The "trick" is to merge the successful reads from several devices.
Source: Text Source
import sys, os, errno
import collections
import cPickle as Pickle
import gzip
if sys.platform == 'win32':
read_failed= lambda exc: exc.errno == errno.EACCES
else:
read_failed= lambda exc: exc.errno == errno.EIO
class Chunk(object):
"A description of a data chunk to be read"
UNIT= 2048
BIG_UNIT= 32*UNIT
def __init__(self, offset, size):
self.offset= offset
self.size= size
def next_attempt(self):
"Return sequence of chunks to retry"
if self.size == self.UNIT:
yield self
else:
for ix in xrange(self.offset, self.offset+self.size, self.UNIT):
yield self.__class__(ix, self.UNIT)
def __getstate__(self):
return self.offset, self.size
def __setstate__(self, tpl):
self.offset, self.size= tpl
def description(self):
"Return textual description of chunk"
unit1= self.offset / self.UNIT
unit2= (self.offset+self.size) / self.UNIT - 1
if unit1 == unit2:
return "%dMiB:%d" % (self.offset//1048576, unit1)
else:
return "%dMiB:%d-%d" % (self.offset//1048576, unit1, unit2)
class SuspectFile(object):
"A file to be copied"
destination= "."
def __init__(self, filename, destination=None):
self.filename= filename
if destination is not None:
self.destination= destination
self.state_filename= os.path.basename(filename) + ".state"
self.phase3= collections.deque()
try:
self.read_last_attempt_state()
except IOError:
self.phase1= self.chunks_to_read()
self.phase2= collections.deque()
def chunks_to_read(self):
result= collections.deque()
filesize= os.path.getsize(self.filename)
for offset in xrange(0, filesize, Chunk.BIG_UNIT):
result.append(Chunk(
offset,
filesize-offset>Chunk.BIG_UNIT
and Chunk.BIG_UNIT
or filesize-offset))
return result
def record_state(self):
if self.phase1 or self.phase2 or self.phase3:
fpr= gzip.open(self.state_filename, "wb")
Pickle.dump(self.phase1, fpr, -1)
dummy_deque= collections.deque()
dummy_deque.extend(self.phase2)
dummy_deque.extend(self.phase3)
Pickle.dump(dummy_deque, fpr, -1)
fpr.close()
else:
try: os.remove(self.state_filename)
except OSError: pass
@staticmethod
def copy_chunk(fpi, fpo, chunk):
fpi.seek(chunk.offset)
data= fpi.read(chunk.size)
if data:
fpo.seek(chunk.offset)
fpo.write(data)
return data
def read_last_attempt_state(self):
fpr= gzip.open(self.state_filename, "rb")
self.phase1= Pickle.load(fpr)
self.phase2= Pickle.load(fpr)
def report_attempt(self, chunk):
"This is to be overloaded with a way to report progress"
pass
def report_success(self, chunk):
pass
def report_failure(self, chunk):
pass
def phase_copy(self, fpi, fpo, phase_in, phase_out):
"Copy chunks from fpi to fpo storing failures in phase_out"
while phase_in:
chunk= phase_in.popleft()
try:
self.report_attempt(chunk)
try:
self.copy_chunk(fpi, fpo, chunk)
except IOError, exc:
if read_failed(exc):
for new_chunk in chunk.next_attempt():
phase_out.append(new_chunk)
chunk= None
self.report_failure(chunk)
else:
raise
else:
_, chunk= chunk, None
self.report_success(_)
finally:
if chunk: phase_in.appendleft(chunk)
def copy(self):
"Copy the file to the local directory"
fpi= open(self.filename, "rb")
fpo_filename= os.path.join(
self.destination, os.path.basename(self.filename))
try:
fpo= open(fpo_filename, "r+b")
except IOError, exc:
if exc.errno == errno.ENOENT:
fpo= open(fpo_filename, "wb")
else:
raise
try:
self.phase_copy(fpi, fpo, self.phase1, self.phase2)
self.phase_copy(fpi, fpo, self.phase2, self.phase3)
finally:
self.record_state()
if __name__=="__main__":
class SuspectFileCmd(SuspectFile):
def report_attempt(self, chunk):
sys.stderr.write(chunk.description())
def report_success(self, chunk):
sys.stderr.write("\r")
def report_failure(self, chunk):
sys.stderr.write(" failed\n")
def record_state(self):
super(SuspectFileCmd, self).record_state()
sys.stderr.write("** remaining %d bytes in fast reads\n" % sum(chunk.size for chunk in self.phase1))
sys.stderr.write("and %d sectors in re-reads\n" % (len(self.phase2) + len(self.phase3)) )
for filename in sys.argv[1:]:
faulty_file= SuspectFileCmd(filename)
sys.stderr.write("copying %s\n" % filename)
faulty_file.copy()
sys.stderr.write("\n")
Discussion:
This recipe can be used for recovering files from damaged media (CD-ROMs and DVDs are specifically addressed, because of the UNIT=2048, which is the byte-size of a sector on these media).
This recipe attempts to copy batches of sectors from the faulty file to the current directory; if a batch (typically 32) read fails, these sectors will be re-read later one-by-one. If these sectors still can't be read, the program will exit storing a file (named as the original filename with ".state" appended) containing the unread sectors. Put the disk in another device, and run again. Only the unread sectors will be retried (as long as the file and its .state exist both in the current directory), and you have a chance of reading more data.
At any point you can stop the program with Control-C. Wait a little until the current read fails, and the program will exit having first stored the .state file.
This code is actually a quick hack which I just prettied up a little bit to share it here.
|