Zip and Unzip in Python
A quick demo showing how to zip/unzip files in a Jupyter notebook using Python.
from pathlib import Path
from fastai.vision import get_image_files
import numpy as np
from zipfile import ZipFile
# working directory
path = Path('/home/dc/coronahack/source/nih-chest-xrays')
# source directory containing files to zip
src_dir = path / 'data'
# csv filepath (to be created/overwritten)
csv_dst = path / 'nih-chest-xrays_sample-2000.csv'
# zip filepath (to be created/overwritten)
zip_dst = path / 'nih-chest-xrays_sample-2000.zip'
# unzip directory (to be created/overwritten)
unzip_dst = path / 'sample-2000'
files = sorted(get_image_files(src_dir, recurse=True))
len(files), files[:5]
n = 2000
seed = np.random.randint(0, 2**32-1)
# seed = 0
np.random.seed(seed)
sample_paths = np.random.choice(files, n, replace=False)
sample_paths
csv_dst.exists(), csv_dst
np.savetxt(csv_dst, sample_paths.astype(np.str), fmt='%s', delimiter=',')
zip_dst.exists(), zip_dst
with ZipFile(zip_dst,'w') as zf:
for fn in sample_paths:
zf.write(fn)
unzip_dst.mkdir(parents=True, exist_ok=True)
unzip_dst.exists(), unzip_dst
with ZipFile(zip_dst, 'r') as zf:
# zf.printdir() # print zip contents
zf.extractall(unzip_dst)
csv_dst.exists(), csv_dst
np.loadtxt(csv_dst, dtype=np.str, delimiter=',')