#encoding:latin-1 # # This moduls allows scanning of Tarfile Content with constant # memory consumption. # # Python 2.5 tarfile module does internal caching, such that # the memory consumption while iterating grows linearly. # # ----------------------------------------------------------------- # # (C) July 2008 Uwe Schmitt # # This module is inspired by the standard libs tarfile by Lars # Gustäbel # module and the work of Tamito KAJIYAMA # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. import gzip, bz2 BLOCKSIZE = 512 def trim_null(v): return v[:v.find('\000')] def oct2int(v): return eval('0' + v.strip('\00 ')) def int2int(v): v = v.strip('\00 ') if v: return int(v) return None class TarInfo: def __init__(self, hblock, tar_name, base): self.tar_name = tar_name self.base = base self.name = trim_null(hblock[0:100]) self.mode = hblock[100:108] self.uid = oct2int(hblock[108:116]) self.gid = oct2int(hblock[116:124]) self.size = oct2int(hblock[124:136]) self.mtime = oct2int(hblock[136:147]) self.chksum = int2int(hblock[148:156]) self.type = hblock[156] self.linkname = trim_null(hblock[157:256]) blocks, remainder = divmod(self.size, BLOCKSIZE) if remainder: blocks += 1 self.size_in_block = blocks*BLOCKSIZE self.ustar = trim_null(hblock[257:257+6]) if self.ustar=="ustar": self.uname = trim_null(hblock[265:265+32]) self.gname = trim_null(hblock[297:297+32]) self.devmajor = int2int(hblock[329:329+8]) self.devminor = int2int(hblock[337:337+8]) else: self.uname = None self.gname = None self.devmajor = None self.devminor = None class TarFile: @classmethod def open(cls, name): if name.endswith(".tgz") or name.endswith(".gz"): fp = gzip.open(name) elif name.endswith(".bz2"): fp = bz2.BZ2File(name) else: fp = open(name,"r") offset = 0 while 1: hblock=fp.read(BLOCKSIZE) if not ord(hblock[0]): break header = TarInfo(hblock, name, fp.tell()) yield header offset += BLOCKSIZE offset += header.size_in_block fp.seek(offset) if __name__ == "__main__": import sys, time if len(sys.argv) == 2: for header in TarFile.open(sys.argv[1]): print print ' name:', header.name print ' size:', header.size, 'bytes' print 'mtime:', time.ctime(header.mtime) print else: print print "need name of tarfile as command line parameter" print print "extensions .tar, .tar.gz, .tgz and .bz2 are allowed" print