'''
I want a function which returns:
dict("file1.txt": list(<contents of file1>),
"file2.txt": list(<contents of file2>),
"file3.txt": list(<contents of file3>),
"file4.txt": list(<contents of file4>))
For input:
file.zip:
outer\
outer\inner1.zip:
file1.txt
file2.txt
outer\inner2.zip:
file3.txt
file4.txt
'''
from zipfile import ZipFile, is_zipfile
from StringIO import StringIO
from shutil import rmtree
from os import path
import tempfile
def unzip_all(input_zip, name, file_contents_pair=dict()):
if is_zipfile(input_zip):
contents=StringIO(ZipFile(StringIO(input_zip)).extractall())
file_contents_pair.update(name, [word for word in contents])
return file_contents_pair
def unzip_from(input_zip):
if not is_zipfile(input_zip): return dict()
file_contents_pairs = dict()
tmpdir = tempfile.mkdtemp()
input_zip=ZipFile(input_zip)
input_zip.extractall(path=tmpdir)
i=False
for member in input_zip.namelist():
if i: # Skipping first file
file_contents_pairs.update(unzip_all(ZipFile(file=ZipFile(path.join(tmpdir, member))), member))
i = True
rmtree(tmpdir)
return file_contents_pairs
if __name__ == '__main__':
print unzip_from('file.zip')