11"""
2- Image Content Agent - Generates marketing images via DALL-E 3.
2+ """ Image Content Agent - Generates marketing images via DALL - E 3 or gpt - image - 1 .
33
4- Provides the generate_dalle_image function used by the orchestrator
5- to create marketing images using DALL-E 3.
4+ Provides the generate_image function used by the orchestrator
5+ to create marketing images using either DALL - E 3 or gpt - image - 1 .
66"""
77
88import logging
@@ -71,8 +71,45 @@ async def generate_dalle_image(
7171 prompt: str,
7272 product_description: str = "",
7373 scene_description: str = "",
74- size : str = "1024x1024" ,
75- quality : str = "hd"
74+ size: str = None,
75+ quality: str = None
76+ ) -> dict:
77+ """
78+ Generate a marketing image using DALL - E 3 or gpt - image - 1.
79+
80+ The model used is determined by AZURE_OPENAI_IMAGE_MODEL setting .
81+
82+ Args :
83+ prompt : The main image generation prompt
84+ product_description : Auto - generated description of product image (for context )
85+ scene_description : Scene / setting description from creative brief
86+ size : Image size (model - specific , uses settings default if not provided )
87+ - dall - e - 3 : 1024 x1024 , 1024 x1792 , 1792 x1024
88+ - gpt - image - 1 : 1024 x1024 , 1536 x1024 , 1024 x1536 , auto
89+ quality : Image quality (model - specific , uses settings default if not provided )
90+ - dall - e - 3 : standard , hd
91+ - gpt - image - 1 : low , medium , high , auto
92+
93+ Returns :
94+ Dictionary containing generated image data and metadata
95+ """
96+ # Determine which model to use
97+ image_model = app_settings.azure_openai.effective_image_model
98+ logger.info(f"Using image generation model: {image_model}")
99+
100+ # Use appropriate generator based on model
101+ if image_model == "gpt-image-1":
102+ return await _generate_gpt_image(prompt, product_description, scene_description, size, quality)
103+ else:
104+ return await _generate_dalle_image(prompt, product_description, scene_description, size, quality)
105+
106+
107+ async def _generate_dalle_image(
108+ prompt: str,
109+ product_description: str = "",
110+ scene_description: str = "",
111+ size: str = None,
112+ quality: str = None
76113) -> dict:
77114 """
78115 Generate a marketing image using DALL - E 3.
@@ -89,6 +126,10 @@ async def generate_dalle_image(
89126 """
90127 brand = app_settings.brand_guidelines
91128
129+ # Use defaults from settings if not provided
130+ size = size or app_settings.azure_openai.image_size
131+ quality = quality or app_settings.azure_openai.image_quality
132+
92133 # DALL-E 3 has a 4000 character limit for prompts
93134 # Truncate product descriptions to essential visual info
94135 truncated_product_desc = _truncate_for_dalle(product_description, max_chars=1500)
@@ -171,12 +212,150 @@ async def generate_dalle_image(
171212 "image_base64": image_data.b64_json,
172213 "prompt_used": full_prompt,
173214 "revised_prompt": getattr(image_data, 'revised_prompt', None),
215+ "model": "dall-e-3",
174216 }
175217
176218 except Exception as e:
177219 logger.exception(f"Error generating DALL-E image: {e}")
178220 return {
179221 "success": False,
180222 "error": str(e),
181- "prompt_used" : full_prompt
223+ "prompt_used": full_prompt,
224+ "model": "dall-e-3",
182225 }
226+
227+
228+ async def _generate_gpt_image(
229+ prompt: str,
230+ product_description: str = "",
231+ scene_description: str = "",
232+ size: str = None,
233+ quality: str = None
234+ ) -> dict:
235+ """
236+ Generate a marketing image using gpt - image - 1.
237+
238+ gpt - image - 1 has different capabilities than DALL - E 3 :
239+ - Supports larger prompt sizes
240+ - Different size options : 1024 x1024 , 1536 x1024 , 1024 x1536 , auto
241+ - Different quality options : low , medium , high , auto
242+ - May have better instruction following
243+
244+ Args :
245+ prompt : The main image generation prompt
246+ product_description : Auto - generated description of product image (for context )
247+ scene_description : Scene / setting description from creative brief
248+ size : Image size (1024 x1024 , 1536 x1024 , 1024 x1536 , auto )
249+ quality : Image quality (low , medium , high , auto )
250+
251+ Returns :
252+ Dictionary containing generated image data and metadata
253+ """
254+ brand = app_settings.brand_guidelines
255+
256+ # Use defaults from settings if not provided
257+ # Map DALL-E quality settings to gpt-image-1 equivalents if needed
258+ size = size or app_settings.azure_openai.image_size
259+ quality = quality or app_settings.azure_openai.image_quality
260+
261+ # Map DALL-E quality values to gpt-image-1 equivalents
262+ quality_mapping = {
263+ "standard": "medium",
264+ "hd": "high",
265+ }
266+ quality = quality_mapping.get(quality, quality)
267+
268+ # Map DALL-E sizes to gpt-image-1 equivalents if needed
269+ size_mapping = {
270+ "1024x1792": "1024x1536", # Closest equivalent
271+ "1792x1024": "1536x1024", # Closest equivalent
272+ }
273+ size = size_mapping.get(size, size)
274+
275+ # gpt-image-1 can handle larger prompts, so we can include more context
276+ truncated_product_desc = _truncate_for_dalle(product_description, max_chars=3000)
277+
278+ main_prompt = prompt[:2000] if len(prompt) > 2000 else prompt
279+ scene_desc = scene_description[:1000] if scene_description and len(scene_description) > 1000 else scene_description
280+
281+ # Build the full prompt with product context and brand guidelines
282+ full_prompt = f"""
283+ Create a professional marketing image for retail advertising .
284+
285+ {brand .get_image_generation_prompt ()}
286+
287+ PRODUCT CONTEXT :
288+ {truncated_product_desc if truncated_product_desc else 'No specific product - create a lifestyle/brand image' }
289+
290+ SCENE DESCRIPTION :
291+ {scene_desc if scene_desc else main_prompt }
292+
293+ MAIN REQUIREMENT :
294+ {main_prompt }
295+
296+ IMPORTANT GUIDELINES :
297+ - Create a polished , professional marketing image
298+ - Suitable for retail advertising and marketing campaigns
299+ - High visual impact with clean composition
300+ - Incorporate brand colors where appropriate : {brand .primary_color }, {brand .secondary_color }
301+ - Modern , aspirational aesthetic
302+ - Bright , optimistic lighting
303+ """
304+
305+ try:
306+ # Get credential
307+ client_id = app_settings.base_settings.azure_client_id
308+ if client_id:
309+ credential = ManagedIdentityCredential(client_id=client_id)
310+ else:
311+ credential = DefaultAzureCredential()
312+
313+ # Get token for Azure OpenAI
314+ token = await credential.get_token("https://cognitiveservices.azure.com/.default")
315+
316+ # Use gpt-image-1 specific endpoint if configured, otherwise DALL-E endpoint, otherwise main endpoint
317+ image_endpoint = (
318+ app_settings.azure_openai.gpt_image_endpoint or
319+ app_settings.azure_openai.dalle_endpoint or
320+ app_settings.azure_openai.endpoint
321+ )
322+ logger.info(f"Using gpt-image-1 endpoint: {image_endpoint}")
323+
324+ client = AsyncAzureOpenAI(
325+ azure_endpoint=image_endpoint,
326+ azure_ad_token=token.token,
327+ api_version=app_settings.azure_openai.preview_api_version,
328+ )
329+
330+ # gpt-image-1 API call
331+ response = await client.images.generate(
332+ model="gpt-image-1",
333+ prompt=full_prompt,
334+ size=size,
335+ quality=quality,
336+ n=1,
337+ response_format="b64_json"
338+ )
339+
340+ image_data = response.data[0]
341+
342+ return {
343+ "success": True,
344+ "image_base64": image_data.b64_json,
345+ "prompt_used": full_prompt,
346+ "revised_prompt": getattr(image_data, 'revised_prompt', None),
347+ "model": "gpt-image-1",
348+ }
349+
350+ except Exception as e:
351+ logger.exception(f"Error generating gpt-image-1 image: {e}")
352+ return {
353+ "success": False,
354+ "error": str(e),
355+ "prompt_used": full_prompt,
356+ "model": "gpt-image-1",
357+ }
358+
359+
360+ # Alias for backwards compatibility
361+ generate_image = generate_dalle_image
0 commit comments