Stream an entire folder using Django without reading everything to memory

The premise is similar to this question. But the accepted answer not only requires every file to be in memory to work. The answer does provide an example of iterator usage but that doesn't work with open(file, 'rb').read(chunk_size).

More importantly that solution doesn't actually download folder. It only group multiple files togather.

My solution is here but essentially, you need to handle folders different from files. The final code is a modified version of chipx86's and allista's example.

Specifically, yield_tar is changed to

    @classmethod
    def yield_tar(cls, file_data_iterable, removed_index):
        stream = FileStream()
        tar = tarfile.TarFile.open(mode='w|', fileobj=stream, bufsize=tarfile.BLOCKSIZE)
        for file in file_data_iterable:
            if file.is_file():
                file_name = str(file.absolute())[removed_index:]
                file_size = file.lstat().st_size
                file_date = file.lstat().st_mtime
                file_path = file.absolute()

                tar_info = tarfile.TarInfo(file_name)
                tar_info.size = int(file_size)
                tar_info.mtime = file_date
                tar.addfile(tar_info)
                yield stream.pop()

                with open(file_path, 'rb') as file_data:
                    while True:
                        data = file_data.read(tarfile.BLOCKSIZE)
                        if not data:
                            break
                        tar.fileobj.write(data)
                        yield stream.pop()

                blocks, remainder = divmod(tar_info.size, tarfile.BLOCKSIZE)
                if remainder > 0:
                    tar.fileobj.write(tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
                    yield stream.pop()
                    blocks += 1
                tar.offset += blocks * tarfile.BLOCKSIZE
            else:
                tar_info = tarfile.TarInfo(str(file.absolute())[removed_index:])
                tar_info.type = b'5'
                tar_info.mode = 0o744
                tar.addfile(tar_info)

        tar.close()
        yield stream.pop()

And being used by

def download_directory(path):
    downloadpath = pathlib.Path(os.path.join(srcs, path))
    files = [p for p in sorted(downloadpath.rglob(f'*'))]
    sizes = [p.lstat().st_size for p in files if p.is_file()]

    response = FileResponse(
        FileStream.yield_tar(files, len(str(downloadpath.absolute())) - len(downloadpath.name)),
        content_type="application/x-tar"
    )
    response["Content-Length"] = sum(sizes)
    response["Content-Disposition"] = f'attachment; filename="{pathlib.Path(path).name}.tar"'
    return response

This way the client will get the unzipped tar file the same name as downloaded folder. And get file size of the entire folder and all subfolders.

Back to Top