Stream an entire folder using Django without reading everything to memory
The premise is similar to this question. But the accepted answer not only requires every file to be in memory to work. The answer does provide an example of iterator usage but that doesn't work with open(file, 'rb').read(chunk_size)
.
More importantly that solution doesn't actually download folder. It only group multiple files togather.
My solution is here but essentially, you need to handle folders different from files. The final code is a modified version of chipx86's and allista's example.
Specifically, yield_tar
is changed to
@classmethod
def yield_tar(cls, file_data_iterable, removed_index):
stream = FileStream()
tar = tarfile.TarFile.open(mode='w|', fileobj=stream, bufsize=tarfile.BLOCKSIZE)
for file in file_data_iterable:
if file.is_file():
file_name = str(file.absolute())[removed_index:]
file_size = file.lstat().st_size
file_date = file.lstat().st_mtime
file_path = file.absolute()
tar_info = tarfile.TarInfo(file_name)
tar_info.size = int(file_size)
tar_info.mtime = file_date
tar.addfile(tar_info)
yield stream.pop()
with open(file_path, 'rb') as file_data:
while True:
data = file_data.read(tarfile.BLOCKSIZE)
if not data:
break
tar.fileobj.write(data)
yield stream.pop()
blocks, remainder = divmod(tar_info.size, tarfile.BLOCKSIZE)
if remainder > 0:
tar.fileobj.write(tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
yield stream.pop()
blocks += 1
tar.offset += blocks * tarfile.BLOCKSIZE
else:
tar_info = tarfile.TarInfo(str(file.absolute())[removed_index:])
tar_info.type = b'5'
tar_info.mode = 0o744
tar.addfile(tar_info)
tar.close()
yield stream.pop()
And being used by
def download_directory(path):
downloadpath = pathlib.Path(os.path.join(srcs, path))
files = [p for p in sorted(downloadpath.rglob(f'*'))]
sizes = [p.lstat().st_size for p in files if p.is_file()]
response = FileResponse(
FileStream.yield_tar(files, len(str(downloadpath.absolute())) - len(downloadpath.name)),
content_type="application/x-tar"
)
response["Content-Length"] = sum(sizes)
response["Content-Disposition"] = f'attachment; filename="{pathlib.Path(path).name}.tar"'
return response
This way the client will get the unzipped tar file the same name as downloaded folder. And get file size of the entire folder and all subfolders.