Skip to content

Commit e481ea8

Browse files
committed
Add gpt-image-1 support as alternative to DALL-E 3
- Added AZURE_OPENAI_IMAGE_MODEL setting (dall-e-3 or gpt-image-1) - Added AZURE_OPENAI_GPT_IMAGE_ENDPOINT for dedicated gpt-image-1 endpoint - Image model can be configured via environment variable - Automatic quality/size mapping between models: - DALL-E 3: standard/hd quality, 1024x1024/1024x1792/1792x1024 sizes - gpt-image-1: low/medium/high/auto quality, 1024x1024/1536x1024/1024x1536/auto sizes - gpt-image-1 supports larger prompts (more context) - Added generate_image alias for backwards compatibility - Existing generate_dalle_image function still works (delegates to selected model)
1 parent b11eb41 commit e481ea8

9 files changed

Lines changed: 895 additions & 10 deletions

File tree

content-gen/src/backend/agents/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
This package provides utility functions used by the orchestrator.
55
"""
66

7-
from backend.agents.image_content_agent import generate_dalle_image
7+
from backend.agents.image_content_agent import generate_dalle_image, generate_image
88

99
__all__ = [
1010
"generate_dalle_image",
11+
"generate_image",
1112
]

content-gen/src/backend/agents/image_content_agent.py

Lines changed: 185 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""
2-
Image Content Agent - Generates marketing images via DALL-E 3.
2+
"""Image Content Agent - Generates marketing images via DALL-E 3 or gpt-image-1.
33

4-
Provides the generate_dalle_image function used by the orchestrator
5-
to create marketing images using DALL-E 3.
4+
Provides the generate_image function used by the orchestrator
5+
to create marketing images using either DALL-E 3 or gpt-image-1.
66
"""
77
88
import logging
@@ -71,8 +71,45 @@ async def generate_dalle_image(
7171
prompt: str,
7272
product_description: str = "",
7373
scene_description: str = "",
74-
size: str = "1024x1024",
75-
quality: str = "hd"
74+
size: str = None,
75+
quality: str = None
76+
) -> dict:
77+
"""
78+
Generate a marketing image using DALL-E 3 or gpt-image-1.
79+
80+
The model used is determined by AZURE_OPENAI_IMAGE_MODEL setting.
81+
82+
Args:
83+
prompt: The main image generation prompt
84+
product_description: Auto-generated description of product image (for context)
85+
scene_description: Scene/setting description from creative brief
86+
size: Image size (model-specific, uses settings default if not provided)
87+
- dall-e-3: 1024x1024, 1024x1792, 1792x1024
88+
- gpt-image-1: 1024x1024, 1536x1024, 1024x1536, auto
89+
quality: Image quality (model-specific, uses settings default if not provided)
90+
- dall-e-3: standard, hd
91+
- gpt-image-1: low, medium, high, auto
92+
93+
Returns:
94+
Dictionary containing generated image data and metadata
95+
"""
96+
# Determine which model to use
97+
image_model = app_settings.azure_openai.effective_image_model
98+
logger.info(f"Using image generation model: {image_model}")
99+
100+
# Use appropriate generator based on model
101+
if image_model == "gpt-image-1":
102+
return await _generate_gpt_image(prompt, product_description, scene_description, size, quality)
103+
else:
104+
return await _generate_dalle_image(prompt, product_description, scene_description, size, quality)
105+
106+
107+
async def _generate_dalle_image(
108+
prompt: str,
109+
product_description: str = "",
110+
scene_description: str = "",
111+
size: str = None,
112+
quality: str = None
76113
) -> dict:
77114
"""
78115
Generate a marketing image using DALL-E 3.
@@ -89,6 +126,10 @@ async def generate_dalle_image(
89126
"""
90127
brand = app_settings.brand_guidelines
91128
129+
# Use defaults from settings if not provided
130+
size = size or app_settings.azure_openai.image_size
131+
quality = quality or app_settings.azure_openai.image_quality
132+
92133
# DALL-E 3 has a 4000 character limit for prompts
93134
# Truncate product descriptions to essential visual info
94135
truncated_product_desc = _truncate_for_dalle(product_description, max_chars=1500)
@@ -171,12 +212,150 @@ async def generate_dalle_image(
171212
"image_base64": image_data.b64_json,
172213
"prompt_used": full_prompt,
173214
"revised_prompt": getattr(image_data, 'revised_prompt', None),
215+
"model": "dall-e-3",
174216
}
175217
176218
except Exception as e:
177219
logger.exception(f"Error generating DALL-E image: {e}")
178220
return {
179221
"success": False,
180222
"error": str(e),
181-
"prompt_used": full_prompt
223+
"prompt_used": full_prompt,
224+
"model": "dall-e-3",
182225
}
226+
227+
228+
async def _generate_gpt_image(
229+
prompt: str,
230+
product_description: str = "",
231+
scene_description: str = "",
232+
size: str = None,
233+
quality: str = None
234+
) -> dict:
235+
"""
236+
Generate a marketing image using gpt-image-1.
237+
238+
gpt-image-1 has different capabilities than DALL-E 3:
239+
- Supports larger prompt sizes
240+
- Different size options: 1024x1024, 1536x1024, 1024x1536, auto
241+
- Different quality options: low, medium, high, auto
242+
- May have better instruction following
243+
244+
Args:
245+
prompt: The main image generation prompt
246+
product_description: Auto-generated description of product image (for context)
247+
scene_description: Scene/setting description from creative brief
248+
size: Image size (1024x1024, 1536x1024, 1024x1536, auto)
249+
quality: Image quality (low, medium, high, auto)
250+
251+
Returns:
252+
Dictionary containing generated image data and metadata
253+
"""
254+
brand = app_settings.brand_guidelines
255+
256+
# Use defaults from settings if not provided
257+
# Map DALL-E quality settings to gpt-image-1 equivalents if needed
258+
size = size or app_settings.azure_openai.image_size
259+
quality = quality or app_settings.azure_openai.image_quality
260+
261+
# Map DALL-E quality values to gpt-image-1 equivalents
262+
quality_mapping = {
263+
"standard": "medium",
264+
"hd": "high",
265+
}
266+
quality = quality_mapping.get(quality, quality)
267+
268+
# Map DALL-E sizes to gpt-image-1 equivalents if needed
269+
size_mapping = {
270+
"1024x1792": "1024x1536", # Closest equivalent
271+
"1792x1024": "1536x1024", # Closest equivalent
272+
}
273+
size = size_mapping.get(size, size)
274+
275+
# gpt-image-1 can handle larger prompts, so we can include more context
276+
truncated_product_desc = _truncate_for_dalle(product_description, max_chars=3000)
277+
278+
main_prompt = prompt[:2000] if len(prompt) > 2000 else prompt
279+
scene_desc = scene_description[:1000] if scene_description and len(scene_description) > 1000 else scene_description
280+
281+
# Build the full prompt with product context and brand guidelines
282+
full_prompt = f"""
283+
Create a professional marketing image for retail advertising.
284+
285+
{brand.get_image_generation_prompt()}
286+
287+
PRODUCT CONTEXT:
288+
{truncated_product_desc if truncated_product_desc else 'No specific product - create a lifestyle/brand image'}
289+
290+
SCENE DESCRIPTION:
291+
{scene_desc if scene_desc else main_prompt}
292+
293+
MAIN REQUIREMENT:
294+
{main_prompt}
295+
296+
IMPORTANT GUIDELINES:
297+
- Create a polished, professional marketing image
298+
- Suitable for retail advertising and marketing campaigns
299+
- High visual impact with clean composition
300+
- Incorporate brand colors where appropriate: {brand.primary_color}, {brand.secondary_color}
301+
- Modern, aspirational aesthetic
302+
- Bright, optimistic lighting
303+
"""
304+
305+
try:
306+
# Get credential
307+
client_id = app_settings.base_settings.azure_client_id
308+
if client_id:
309+
credential = ManagedIdentityCredential(client_id=client_id)
310+
else:
311+
credential = DefaultAzureCredential()
312+
313+
# Get token for Azure OpenAI
314+
token = await credential.get_token("https://cognitiveservices.azure.com/.default")
315+
316+
# Use gpt-image-1 specific endpoint if configured, otherwise DALL-E endpoint, otherwise main endpoint
317+
image_endpoint = (
318+
app_settings.azure_openai.gpt_image_endpoint or
319+
app_settings.azure_openai.dalle_endpoint or
320+
app_settings.azure_openai.endpoint
321+
)
322+
logger.info(f"Using gpt-image-1 endpoint: {image_endpoint}")
323+
324+
client = AsyncAzureOpenAI(
325+
azure_endpoint=image_endpoint,
326+
azure_ad_token=token.token,
327+
api_version=app_settings.azure_openai.preview_api_version,
328+
)
329+
330+
# gpt-image-1 API call
331+
response = await client.images.generate(
332+
model="gpt-image-1",
333+
prompt=full_prompt,
334+
size=size,
335+
quality=quality,
336+
n=1,
337+
response_format="b64_json"
338+
)
339+
340+
image_data = response.data[0]
341+
342+
return {
343+
"success": True,
344+
"image_base64": image_data.b64_json,
345+
"prompt_used": full_prompt,
346+
"revised_prompt": getattr(image_data, 'revised_prompt', None),
347+
"model": "gpt-image-1",
348+
}
349+
350+
except Exception as e:
351+
logger.exception(f"Error generating gpt-image-1 image: {e}")
352+
return {
353+
"success": False,
354+
"error": str(e),
355+
"prompt_used": full_prompt,
356+
"model": "gpt-image-1",
357+
}
358+
359+
360+
# Alias for backwards compatibility
361+
generate_image = generate_dalle_image

content-gen/src/backend/settings.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class _ChatHistorySettings(BaseSettings):
6262

6363

6464
class _AzureOpenAISettings(BaseSettings):
65-
"""Azure OpenAI configuration for GPT-5 and DALL-E 3."""
65+
"""Azure OpenAI configuration for GPT and image generation models."""
6666
model_config = SettingsConfigDict(
6767
env_prefix="AZURE_OPENAI_",
6868
env_file=DOTENV_PATH,
@@ -72,8 +72,16 @@ class _AzureOpenAISettings(BaseSettings):
7272

7373
gpt_model: str = Field(default="gpt-5", alias="AZURE_OPENAI_GPT_MODEL")
7474
model: str = "gpt-5"
75-
dalle_model: str = Field(default="dall-e-3", alias="AZURE_OPENAI_DALLE_MODEL")
75+
76+
# Image generation model settings
77+
# Supported models: "dall-e-3" or "gpt-image-1"
78+
image_model: str = Field(default="dall-e-3", alias="AZURE_OPENAI_IMAGE_MODEL")
79+
dalle_model: str = Field(default="dall-e-3", alias="AZURE_OPENAI_DALLE_MODEL") # Legacy alias
7680
dalle_endpoint: Optional[str] = Field(default=None, alias="AZURE_OPENAI_DALLE_ENDPOINT")
81+
82+
# gpt-image-1 specific endpoint (if different from DALL-E endpoint)
83+
gpt_image_endpoint: Optional[str] = Field(default=None, alias="AZURE_OPENAI_GPT_IMAGE_ENDPOINT")
84+
7785
resource: Optional[str] = None
7886
endpoint: Optional[str] = None
7987
temperature: float = 0.7
@@ -84,8 +92,24 @@ class _AzureOpenAISettings(BaseSettings):
8492
preview_api_version: str = "2024-02-01"
8593

8694
# Image generation settings
95+
# For dall-e-3: 1024x1024, 1024x1792, 1792x1024
96+
# For gpt-image-1: 1024x1024, 1536x1024, 1024x1536, auto
8797
image_size: str = "1024x1024"
88-
image_quality: str = "hd"
98+
image_quality: str = "hd" # dall-e-3: standard/hd, gpt-image-1: low/medium/high/auto
99+
100+
@property
101+
def effective_image_model(self) -> str:
102+
"""Get the effective image model, preferring image_model over dalle_model."""
103+
# If image_model is explicitly set and not the default, use it
104+
# Otherwise fall back to dalle_model for backwards compatibility
105+
return self.image_model if self.image_model else self.dalle_model
106+
107+
@property
108+
def image_endpoint(self) -> Optional[str]:
109+
"""Get the appropriate endpoint for the configured image model."""
110+
if self.effective_image_model == "gpt-image-1" and self.gpt_image_endpoint:
111+
return self.gpt_image_endpoint
112+
return self.dalle_endpoint
89113

90114
@model_validator(mode="after")
91115
def ensure_endpoint(self) -> Self:
-1.34 MB
Binary file not shown.

docs/generate_architecture.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""
2+
Generate Solution Architecture Diagram for Content Generation Accelerator
3+
"""
4+
from diagrams import Diagram, Cluster, Edge
5+
from diagrams.azure.compute import ContainerInstances, AppServices, ContainerRegistries
6+
from diagrams.azure.database import CosmosDb, BlobStorage
7+
from diagrams.azure.ml import CognitiveServices, AzureOpenAI
8+
from diagrams.azure.web import AppServices as WebApp
9+
from diagrams.azure.network import VirtualNetworks, PrivateEndpoint
10+
from diagrams.programming.framework import React
11+
from diagrams.programming.language import Python
12+
from diagrams.generic.storage import Storage
13+
from diagrams.onprem.client import User
14+
15+
# Graph attributes for dark theme matching the reference image
16+
graph_attr = {
17+
"bgcolor": "#1a2634",
18+
"fontcolor": "white",
19+
"fontsize": "14",
20+
"pad": "0.5",
21+
"splines": "ortho",
22+
"nodesep": "1.0",
23+
"ranksep": "1.5",
24+
}
25+
26+
node_attr = {
27+
"fontcolor": "white",
28+
"fontsize": "11",
29+
}
30+
31+
edge_attr = {
32+
"color": "#4a9eff",
33+
"style": "bold",
34+
}
35+
36+
with Diagram(
37+
"Content Generation Solution Architecture",
38+
filename="/home/jahunte/content-generation-solution-accelerator/docs/images/readme/solution_architecture",
39+
outformat="png",
40+
show=False,
41+
direction="LR",
42+
graph_attr=graph_attr,
43+
node_attr=node_attr,
44+
edge_attr=edge_attr,
45+
):
46+
user = User("User")
47+
48+
with Cluster("Azure Cloud", graph_attr={"bgcolor": "#243447", "fontcolor": "white"}):
49+
50+
with Cluster("Frontend Tier"):
51+
app_service = AppServices("App Service\n(Node.js)")
52+
53+
with Cluster("Container Registry"):
54+
acr = ContainerRegistries("Azure Container\nRegistry")
55+
56+
with Cluster("Backend Tier (VNet Integrated)"):
57+
aci = ContainerInstances("Container Instance\n(Python/Quart)")
58+
59+
with Cluster("AI Services"):
60+
aoai_gpt = CognitiveServices("Azure OpenAI\n(GPT-5.1)")
61+
aoai_dalle = CognitiveServices("Azure OpenAI\n(DALL-E 3)")
62+
63+
with Cluster("Data Storage"):
64+
cosmos = CosmosDb("Cosmos DB\n(Briefs, Products,\nChat History)")
65+
blob = BlobStorage("Blob Storage\n(Product Images,\nGenerated Content)")
66+
67+
# User flow
68+
user >> Edge(label="HTTPS") >> app_service
69+
70+
# App Service to Backend
71+
app_service >> Edge(label="API Proxy\n(Private VNet)") >> aci
72+
73+
# Container Registry
74+
acr >> Edge(label="Pull Image") >> aci
75+
76+
# Backend to AI Services
77+
aci >> Edge(label="Content\nGeneration") >> aoai_gpt
78+
aci >> Edge(label="Image\nGeneration") >> aoai_dalle
79+
80+
# Backend to Data
81+
aci >> Edge(label="CRUD\nOperations") >> cosmos
82+
aci >> Edge(label="Store/Retrieve\nImages") >> blob

0 commit comments

Comments
 (0)