1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | ''' I want a function which returns: dict("file1.txt": list(<contents of file1>), "file2.txt": list(<contents of file2>), "file3.txt": list(<contents of file3>), "file4.txt": list(<contents of file4>)) For input: file.zip: outer\ outer\inner1.zip: file1.txt file2.txt outer\inner2.zip: file3.txt file4.txt ''' from zipfile import ZipFile, is_zipfile from StringIO import StringIO def extract_zip(input_zip): return StringIO(ZipFile(StringIO(input_zip)).extractall()) def recursive_zip_contents(input_zip): return [extract_zip(this_zip) for this_zip in extract_zip(input_zip) if is_zipfile(this_zip)] def get_filename_list_pairs(input_zip): return {(key, (word for word in open(key, 'r')).read()) for key in recursive_zip_contents(input_zip)} if __name__ == "__main__": print get_filename_list_pairs('file.zip') |
JycnICAgIAogICAgSSB3YW50IGEgZnVuY3Rpb24gd2hpY2ggcmV0dXJuczoKICAgIGRpY3QoImZpbGUxLnR4dCI6IGxpc3QoPGNvbnRlbnRzIG9mIGZpbGUxPiksCiAgICAgICAgICJmaWxlMi50eHQiOiBsaXN0KDxjb250ZW50cyBvZiBmaWxlMj4pLAogICAgICAgICAiZmlsZTMudHh0IjogbGlzdCg8Y29udGVudHMgb2YgZmlsZTM+KSwKICAgICAgICAgImZpbGU0LnR4dCI6IGxpc3QoPGNvbnRlbnRzIG9mIGZpbGU0PikpCiAgICAKICAgIEZvciBpbnB1dDogCiAgICAgICAgZmlsZS56aXA6CiAgICAgICAgICAgIG91dGVyXAogICAgICAgICAgICBvdXRlclxpbm5lcjEuemlwOgogICAgICAgICAgICAgICAgICAgIGZpbGUxLnR4dAogICAgICAgICAgICAgICAgICAgIGZpbGUyLnR4dAogICAgICAgICAgICBvdXRlclxpbm5lcjIuemlwOgogICAgICAgICAgICAgICAgICAgIGZpbGUzLnR4dAogICAgICAgICAgICAgICAgICAgIGZpbGU0LnR4dAonJycKCmZyb20gemlwZmlsZSBpbXBvcnQgWmlwRmlsZSwgaXNfemlwZmlsZQpmcm9tIFN0cmluZ0lPIGltcG9ydCBTdHJpbmdJTwoKZGVmIGV4dHJhY3RfemlwKGlucHV0X3ppcCk6CiAgICByZXR1cm4gU3RyaW5nSU8oWmlwRmlsZShTdHJpbmdJTyhpbnB1dF96aXApKS5leHRyYWN0YWxsKCkpCgpkZWYgcmVjdXJzaXZlX3ppcF9jb250ZW50cyhpbnB1dF96aXApOgogICAgcmV0dXJuIFtleHRyYWN0X3ppcCh0aGlzX3ppcCkgZm9yIHRoaXNfemlwIGluIGV4dHJhY3RfemlwKGlucHV0X3ppcCkgaWYgaXNfemlwZmlsZSh0aGlzX3ppcCldCgpkZWYgZ2V0X2ZpbGVuYW1lX2xpc3RfcGFpcnMoaW5wdXRfemlwKToKICAgIHJldHVybiB7KGtleSwgKHdvcmQgZm9yIHdvcmQgaW4gb3BlbihrZXksICdyJykpLnJlYWQoKSkgZm9yIGtleSBpbiByZWN1cnNpdmVfemlwX2NvbnRlbnRzKGlucHV0X3ppcCl9IAoKaWYgX19uYW1lX18gPT0gIl9fbWFpbl9fIjoKICAgIHByaW50IGdldF9maWxlbmFtZV9saXN0X3BhaXJzKCdmaWxlLnppcCcp
Traceback (most recent call last):
File "unzip_all.py", line 32, in <module>
print get_filename_list_pairs('file.zip')
File "unzip_all.py", line 29, in get_filename_list_pairs
return {(key, (word for word in open(key, 'r')).read()) for key in recursive_zip_contents(input_zip)}
File "unzip_all.py", line 26, in recursive_zip_contents
return [extract_zip(this_zip) for this_zip in extract_zip(input_zip) if is_zipfile(this_zip)]
File "unzip_all.py", line 23, in extract_zip
return StringIO(ZipFile(StringIO(input_zip)).extractall())
File "C:\Python27\lib\zipfile.py", line 714, in __init__
self._GetContents()
File "C:\Python27\lib\zipfile.py", line 748, in _GetContents
self._RealGetContents()
File "C:\Python27\lib\zipfile.py", line 763, in _RealGetContents
raise BadZipfile, "File is not a zip file"
zipfile.BadZipfile: File is not a zip file
File "unzip_all.py", line 32, in <module>
print get_filename_list_pairs('file.zip')
File "unzip_all.py", line 29, in get_filename_list_pairs
return {(key, (word for word in open(key, 'r')).read()) for key in recursive_zip_contents(input_zip)}
File "unzip_all.py", line 26, in recursive_zip_contents
return [extract_zip(this_zip) for this_zip in extract_zip(input_zip) if is_zipfile(this_zip)]
File "unzip_all.py", line 23, in extract_zip
return StringIO(ZipFile(StringIO(input_zip)).extractall())
File "C:\Python27\lib\zipfile.py", line 714, in __init__
self._GetContents()
File "C:\Python27\lib\zipfile.py", line 748, in _GetContents
self._RealGetContents()
File "C:\Python27\lib\zipfile.py", line 763, in _RealGetContents
raise BadZipfile, "File is not a zip file"
zipfile.BadZipfile: File is not a zip file


