Django improve performance in dumping data

I have a main model called MainModel and n models ModelA, ModelB ... related with MainModel with a ForeingKey.

I want to export n csv each one made from 10k MainModels . This is the code:

import csv
import io
import boto3
from django.core.management.base import BaseCommand
from django.conf import settings
import datetime
import time

MODELS = [
    'ModelA',
    'ModelB',
    'ModelC',
    'ModelD',
    'ModelE',
    'ModelF',
    'ModelG',
    'ModelH',
    'ModelI',
    'ModelJ',
    'ModelK',
    'ModelL',
    'ModelM',
    'ModelN',
    'ModelO',
    'ModelP',
    'ModelQ',
    'ModelR',
    'ModelS',
    'ModelT',
    'ModelU',
]

CHUNK_SIZE = 10000

class Command(BaseCommand):
    help = 'Dump data of all models related to a given club_id into multiple CSV files on S3'

    def add_arguments(self, parser):
        parser.add_argument('club_id', type=int, help='The club_id for filtering the data')
        parser.add_argument('operation_id', type=str, help='The rsync operation ID to structure the folder on S3')
        parser.add_argument('--output', type=str, default='output.csv', help='Base output file name (default: output.csv)')

    def handle(self, *args, **kwargs):
        club_id = kwargs['club_id']
        operation_id = kwargs['operation_id']
        output_file = kwargs['output']

        # Retrieve models using the name in MODELS
        models = [get_model_from_name(model_name) for model_name in MODELS]

        s3 = boto3.client('s3')
        bucket_name = 'your-s3-bucket-name'
        server = settings.MY_ENVIRONMENT if settings.MY_ENVIRONMENT else "default_env"
        folder_name = f"{server}-{operation_id}/data"

        mainmodel = MainModel.objects.filter(club_id=club_id)
        total_sessions = mainmodel.count()

        all_fields = sorted(sorted(set(field.name for model in models for field in model._meta.fields)) + ['model'])
        for start in range(0, total_sessions, CHUNK_SIZE):
            time_start = time.time()
            self.stdout.write(f"Processing records from {start} to {start + CHUNK_SIZE}")
            csv_buffer = io.StringIO()
            writer = csv.writer(csv_buffer)

            # Write header row with all field names
            writer.writerow(all_fields)

            chunk_mainmodels = mainmodel[start:start + CHUNK_SIZE]

            for mm in chunk_mainmodels:
                for model in models:
                    for record in model.objects.filter(mainmodel=mm):
                        row = []
                        for field in all_fields:
                            try:
                                value = getattr(record, field)
                                if field == 'athletesession':
                                    value = mm.pk
                                elif field in ['created_on', 'updated_on'] and isinstance(value, (datetime.datetime, datetime.date)):
                                    value = value.isoformat()
                                elif field == "model":
                                    value = model._meta.model_name
                                row.append(value)
                            except AttributeError:
                                row.append(None)
                        writer.writerow(row)

            chunk_file_name = f"{output_file.replace('.csv', '')}_part_{start // CHUNK_SIZE + 1}.csv"
            s3_key = f"{folder_name}/{chunk_file_name}"

            s3.put_object(Bucket=bucket_name, Key=s3_key, Body=csv_buffer.getvalue())

            progress_percentage = ((start + len(chunk_mainmodels)) / total_sessions) * 100
            self.stdout.write(self.style.SUCCESS(f'Chunk {start // CHUNK_SIZE + 1} completed. Progress: {progress_percentage:.2f}% - Time elapsed: {time.time() - time_start:.2f}s'))

        self.stdout.write(self.style.SUCCESS('All data successfully dumped in chunks to S3'))

It takes ~7 minutes to save 10k on records and upload them on s3. Is there a way to optimize the process in order to reduce timing?

Back to Top