Gemini 2.0 experimental image generation in django

I am working on a webapp with django backend. It is related to image generation with character consistency, we finally have a model that works well for that, gemini 2.0 flash experimental. I am generating a character with openAI and then passing it as a reference to gemini via API with a prompt to generate an image. The code works perfectly in jupyter notebook, however in django, it throws an error. I am not sure how to fix it. Here is the code that works in jupyter notebook:

def generate_image(request):
"""
Generate images using OpenAI API or Google Gemini API based on parameters
"""
text_prompt = request.data.get('text_prompt')
use_gemini = request.data.get('use_gemini', False)
character_image_url = request.data.get('character_image_url')

if not text_prompt:
    return Response({"error": "Text prompt is required"}, status=status.HTTP_400_BAD_REQUEST)

try:
    print(f"Generating image for prompt: {text_prompt}")
    print(f"Using Gemini: {use_gemini}, Character image URL: {character_image_url is not None}")

    # If using Gemini with a character reference
    if use_gemini and character_image_url:
        try:
            print(f"Generating image with Gemini: {text_prompt[:100]}...")

            # Download the character image
            response = requests.get(character_image_url)
            if response.status_code != 200:
                return Response({"error": "Failed to download character image"}, status=status.HTTP_400_BAD_REQUEST)

            # Load the character image into a PIL Image
            reference_image = Image.open(BytesIO(response.content))

            # Save the reference image temporarily using standard Python tempfile
            import tempfile

            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
            temp_image_path = temp_file.name
            temp_file.close()

            reference_image.save(temp_image_path, format='PNG')
            print(f"Reference image saved to {temp_image_path}")

            # Reload the image to ensure correct format
            pil_image = Image.open(temp_image_path)

            # Create content with both text and image
            contents = [
                f"Generate an illustration based on this description: '{text_prompt}'. Include this character in the scene.",
                pil_image
            ]

            # Make the API call
            response = gemini_client.models.generate_content(
                model="gemini-2.0-flash-exp",
                contents=contents,
                config=types.GenerateContentConfig(
                    response_modalities=['Text', 'Image']
                )
            )

            # Process the response
            generated_image = None
            response_text = ""
            timestamp = None

            for part in response.candidates[0].content.parts:
                if part.text is not None:
                    response_text += part.text
                elif part.inline_data is not None:
                    # Decode the base64 image data
                    image_data = base64.b64decode(part.inline_data.data)

                    # Open the image
                    generated_image = Image.open(BytesIO(image_data))

                    # Save the image to a BytesIO object first
                    img_io = BytesIO()
                    generated_image.save(img_io, format='PNG')
                    img_io.seek(0)

                    # Create a unique filename
                    timestamp = int(time.time())
                    filename = f'gemini_image_{timestamp}.png'

                    # Use Django's default storage
                    from django.core.files.storage import default_storage
                    from django.core.files.base import ContentFile

                    # Save using default storage
                    file_path = default_storage.save(filename, ContentFile(img_io.getvalue()))

                    print(f"Gemini image saved to {file_path}")

                    # Store the URL (relative to MEDIA_URL)
                    media_url = f"{settings.MEDIA_URL}{file_path}".replace('\\', '/')

            if generated_image:
                # Return the URL of the saved image
                return Response({"image_urls": [media_url]})
            else:
                raise Exception("No image was generated")

        except Exception as e:
            print(f"Gemini image generation failed: {str(e)}")
            # Fall back to OpenAI if Gemini fails
            print("Falling back to OpenAI for image generation...")

    # Use OpenAI if Gemini is not specified or failed
    # Call image generation API with new client format
    response = client.images.generate(
        prompt=f"Children's book illustration of {text_prompt}. Colorful, friendly, kid-appropriate.",
        n=3,
        size="512x512"
    )

    # Access the image URLs from the response
    image_urls = [img.url for img in response.data]
    print(f"Successfully generated {len(image_urls)} images with OpenAI")
    return Response({"image_urls": image_urls})
except Exception as e:
    error_message = str(e)
    print(f"Image generation failed: {error_message}")
    return Response({"error": error_message}, status=status.HTTP_400_BAD_REQUEST)

And here is the code in django that throws error:

def generate_image_gemini(prompt, reference_image=None):
"""
Generate images using Google's Gemini API.

Parameters:
- prompt (str): Text description of the desired image
- reference_image (PIL.Image, optional): Reference image for character consistency

Returns:
- Generated image as PIL Image object and response text
"""
try:
    print(f"Generating image with Gemini: {prompt[:100]}...")

    if reference_image:
        # Save the reference image temporarily
        temp_image_path = 'temp_reference_image.png'
        reference_image.save(temp_image_path)
        print(f"Reference image saved to {temp_image_path}")

        # Reload the image to ensure correct format
        pil_image = Image.open(temp_image_path)

        # Create content with both text and image
        contents = [
            f"Generate an illustration based on this description: '{prompt}'. Include this character in the scene.",
            pil_image
        ]
    else:
        # Text-only prompt
        contents = [f"Generate an illustration based on this description: '{prompt}'"]

    # Make the API call
    response = gemini_client.models.generate_content(
        model="gemini-2.0-flash-exp",
        contents=contents,
        config=types.GenerateContentConfig(
            response_modalities=['Text', 'Image']
        )
    )

    # Process the response
    generated_image = None
    response_text = ""

    for part in response.candidates[0].content.parts:
        if part.text is not None:
            response_text += part.text
        elif part.inline_data is not None:
            # Decode the base64 image data
            image_data = base64.b64decode(part.inline_data.data)

            # Open the image
            generated_image = Image.open(BytesIO(image_data))

            # Save the image
            image_path = f'gemini_image_{int(time.time())}.png'
            generated_image.save(image_path)
            print(f"Gemini image saved to {image_path}")

    return generated_image, response_text

except Exception as e:
    print(f"Error generating image with Gemini: {e}")
    return None, f"Error: {str(e)}"

And following is the error:

Gemini image generation failed: cannot identify image file <_io.BytesIO object at 0x000001DDD95A2D90>

I have tried many solutions provided by ChatGPT and Claude but I am going in circles. The reference file is being saved and loaded without any problem but Gemini is not returning image or whatever it is returning is not being saved. I would greatly appreciate any ideas/suggestions.

genai library uses raw data bytes as the response data format for images, so you can omit base64 decoding.

Instead of

image_data = base64.b64decode(part.inline_data.data)
# Open the image
generated_image = Image.open(io.BytesIO(image_data))

do

# Open the image
generated_image = Image.open(io.BytesIO(part.inline_data.data))
Вернуться на верх