Gemini 2.0 experimental image generation in django
I am working on a webapp with django backend. It is related to image generation with character consistency, we finally have a model that works well for that, gemini 2.0 flash experimental. I am generating a character with openAI and then passing it as a reference to gemini via API with a prompt to generate an image. The code works perfectly in jupyter notebook, however in django, it throws an error. I am not sure how to fix it. Here is the code that works in jupyter notebook:
def generate_image(request):
"""
Generate images using OpenAI API or Google Gemini API based on parameters
"""
text_prompt = request.data.get('text_prompt')
use_gemini = request.data.get('use_gemini', False)
character_image_url = request.data.get('character_image_url')
if not text_prompt:
return Response({"error": "Text prompt is required"}, status=status.HTTP_400_BAD_REQUEST)
try:
print(f"Generating image for prompt: {text_prompt}")
print(f"Using Gemini: {use_gemini}, Character image URL: {character_image_url is not None}")
# If using Gemini with a character reference
if use_gemini and character_image_url:
try:
print(f"Generating image with Gemini: {text_prompt[:100]}...")
# Download the character image
response = requests.get(character_image_url)
if response.status_code != 200:
return Response({"error": "Failed to download character image"}, status=status.HTTP_400_BAD_REQUEST)
# Load the character image into a PIL Image
reference_image = Image.open(BytesIO(response.content))
# Save the reference image temporarily using standard Python tempfile
import tempfile
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
temp_image_path = temp_file.name
temp_file.close()
reference_image.save(temp_image_path, format='PNG')
print(f"Reference image saved to {temp_image_path}")
# Reload the image to ensure correct format
pil_image = Image.open(temp_image_path)
# Create content with both text and image
contents = [
f"Generate an illustration based on this description: '{text_prompt}'. Include this character in the scene.",
pil_image
]
# Make the API call
response = gemini_client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=contents,
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
# Process the response
generated_image = None
response_text = ""
timestamp = None
for part in response.candidates[0].content.parts:
if part.text is not None:
response_text += part.text
elif part.inline_data is not None:
# Decode the base64 image data
image_data = base64.b64decode(part.inline_data.data)
# Open the image
generated_image = Image.open(BytesIO(image_data))
# Save the image to a BytesIO object first
img_io = BytesIO()
generated_image.save(img_io, format='PNG')
img_io.seek(0)
# Create a unique filename
timestamp = int(time.time())
filename = f'gemini_image_{timestamp}.png'
# Use Django's default storage
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
# Save using default storage
file_path = default_storage.save(filename, ContentFile(img_io.getvalue()))
print(f"Gemini image saved to {file_path}")
# Store the URL (relative to MEDIA_URL)
media_url = f"{settings.MEDIA_URL}{file_path}".replace('\\', '/')
if generated_image:
# Return the URL of the saved image
return Response({"image_urls": [media_url]})
else:
raise Exception("No image was generated")
except Exception as e:
print(f"Gemini image generation failed: {str(e)}")
# Fall back to OpenAI if Gemini fails
print("Falling back to OpenAI for image generation...")
# Use OpenAI if Gemini is not specified or failed
# Call image generation API with new client format
response = client.images.generate(
prompt=f"Children's book illustration of {text_prompt}. Colorful, friendly, kid-appropriate.",
n=3,
size="512x512"
)
# Access the image URLs from the response
image_urls = [img.url for img in response.data]
print(f"Successfully generated {len(image_urls)} images with OpenAI")
return Response({"image_urls": image_urls})
except Exception as e:
error_message = str(e)
print(f"Image generation failed: {error_message}")
return Response({"error": error_message}, status=status.HTTP_400_BAD_REQUEST)
And here is the code in django that throws error:
def generate_image_gemini(prompt, reference_image=None):
"""
Generate images using Google's Gemini API.
Parameters:
- prompt (str): Text description of the desired image
- reference_image (PIL.Image, optional): Reference image for character consistency
Returns:
- Generated image as PIL Image object and response text
"""
try:
print(f"Generating image with Gemini: {prompt[:100]}...")
if reference_image:
# Save the reference image temporarily
temp_image_path = 'temp_reference_image.png'
reference_image.save(temp_image_path)
print(f"Reference image saved to {temp_image_path}")
# Reload the image to ensure correct format
pil_image = Image.open(temp_image_path)
# Create content with both text and image
contents = [
f"Generate an illustration based on this description: '{prompt}'. Include this character in the scene.",
pil_image
]
else:
# Text-only prompt
contents = [f"Generate an illustration based on this description: '{prompt}'"]
# Make the API call
response = gemini_client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=contents,
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
# Process the response
generated_image = None
response_text = ""
for part in response.candidates[0].content.parts:
if part.text is not None:
response_text += part.text
elif part.inline_data is not None:
# Decode the base64 image data
image_data = base64.b64decode(part.inline_data.data)
# Open the image
generated_image = Image.open(BytesIO(image_data))
# Save the image
image_path = f'gemini_image_{int(time.time())}.png'
generated_image.save(image_path)
print(f"Gemini image saved to {image_path}")
return generated_image, response_text
except Exception as e:
print(f"Error generating image with Gemini: {e}")
return None, f"Error: {str(e)}"
And following is the error:
Gemini image generation failed: cannot identify image file <_io.BytesIO object at 0x000001DDD95A2D90>
I have tried many solutions provided by ChatGPT and Claude but I am going in circles. The reference file is being saved and loaded without any problem but Gemini is not returning image or whatever it is returning is not being saved. I would greatly appreciate any ideas/suggestions.
genai
library uses raw data bytes as the response data format for images, so you can omit base64 decoding.
Instead of
image_data = base64.b64decode(part.inline_data.data)
# Open the image
generated_image = Image.open(io.BytesIO(image_data))
do
# Open the image
generated_image = Image.open(io.BytesIO(part.inline_data.data))