'''
I want a function which returns:
dict("file1.txt": list(<contents of file1>),
"file2.txt": list(<contents of file2>),
"file3.txt": list(<contents of file3>),
"file4.txt": list(<contents of file4>))
For input:
file.zip:
outer\
outer\inner1.zip:
file1.txt
file2.txt
outer\inner2.zip:
file3.txt
file4.txt
'''
from zipfile import ZipFile, is_zipfile
from StringIO import StringIO
def extract_zip(input_zip):
return StringIO(ZipFile(StringIO(input_zip)).extractall())
def recursive_zip_contents(input_zip):
return [extract_zip(this_zip) for this_zip in extract_zip(input_zip) if is_zipfile(this_zip)]
def get_filename_list_pairs(input_zip):
return {(key, (word for word in open(key, 'r')).read()) for key in recursive_zip_contents(input_zip)}
if __name__ == "__main__":
print get_filename_list_pairs('file.zip')
JycnICAgIAogICAgSSB3YW50IGEgZnVuY3Rpb24gd2hpY2ggcmV0dXJuczoKICAgIGRpY3QoImZpbGUxLnR4dCI6IGxpc3QoPGNvbnRlbnRzIG9mIGZpbGUxPiksCiAgICAgICAgICJmaWxlMi50eHQiOiBsaXN0KDxjb250ZW50cyBvZiBmaWxlMj4pLAogICAgICAgICAiZmlsZTMudHh0IjogbGlzdCg8Y29udGVudHMgb2YgZmlsZTM+KSwKICAgICAgICAgImZpbGU0LnR4dCI6IGxpc3QoPGNvbnRlbnRzIG9mIGZpbGU0PikpCiAgICAKICAgIEZvciBpbnB1dDogCiAgICAgICAgZmlsZS56aXA6CiAgICAgICAgICAgIG91dGVyXAogICAgICAgICAgICBvdXRlclxpbm5lcjEuemlwOgogICAgICAgICAgICAgICAgICAgIGZpbGUxLnR4dAogICAgICAgICAgICAgICAgICAgIGZpbGUyLnR4dAogICAgICAgICAgICBvdXRlclxpbm5lcjIuemlwOgogICAgICAgICAgICAgICAgICAgIGZpbGUzLnR4dAogICAgICAgICAgICAgICAgICAgIGZpbGU0LnR4dAonJycKCmZyb20gemlwZmlsZSBpbXBvcnQgWmlwRmlsZSwgaXNfemlwZmlsZQpmcm9tIFN0cmluZ0lPIGltcG9ydCBTdHJpbmdJTwoKZGVmIGV4dHJhY3RfemlwKGlucHV0X3ppcCk6CiAgICByZXR1cm4gU3RyaW5nSU8oWmlwRmlsZShTdHJpbmdJTyhpbnB1dF96aXApKS5leHRyYWN0YWxsKCkpCgpkZWYgcmVjdXJzaXZlX3ppcF9jb250ZW50cyhpbnB1dF96aXApOgogICAgcmV0dXJuIFtleHRyYWN0X3ppcCh0aGlzX3ppcCkgZm9yIHRoaXNfemlwIGluIGV4dHJhY3RfemlwKGlucHV0X3ppcCkgaWYgaXNfemlwZmlsZSh0aGlzX3ppcCldCgpkZWYgZ2V0X2ZpbGVuYW1lX2xpc3RfcGFpcnMoaW5wdXRfemlwKToKICAgIHJldHVybiB7KGtleSwgKHdvcmQgZm9yIHdvcmQgaW4gb3BlbihrZXksICdyJykpLnJlYWQoKSkgZm9yIGtleSBpbiByZWN1cnNpdmVfemlwX2NvbnRlbnRzKGlucHV0X3ppcCl9IAoKaWYgX19uYW1lX18gPT0gIl9fbWFpbl9fIjoKICAgIHByaW50IGdldF9maWxlbmFtZV9saXN0X3BhaXJzKCdmaWxlLnppcCcp