Skip to content

Commit f9f6aab

Browse files
Merge pull request #737 from microsoft/psl-bug-rai1
fix: Resolve RAI reuse bug (#28329)
2 parents 14cc118 + a683f05 commit f9f6aab

2 files changed

Lines changed: 123 additions & 20 deletions

File tree

src/backend/v4/magentic_agents/common/lifecycle.py

Lines changed: 113 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(
4343
agent_description: str | None = None,
4444
agent_instructions: str | None = None,
4545
model_deployment_name: str | None = None,
46+
project_client=None,
4647
) -> None:
4748
self._stack: AsyncExitStack | None = None
4849
self.mcp_cfg: MCPConfig | None = mcp
@@ -58,6 +59,7 @@ def __init__(
5859
self.agent_description: str | None = agent_description
5960
self.agent_instructions: str | None = agent_instructions
6061
self.model_deployment_name: str | None = model_deployment_name
62+
self.project_client = project_client
6163
self.logger = logging.getLogger(__name__)
6264

6365
async def open(self) -> "MCPEnabledBase":
@@ -149,6 +151,55 @@ def get_chat_client(self, chat_client) -> AzureAIAgentClient:
149151
)
150152
return chat_client
151153

154+
async def resolve_agent_id(self, agent_id: str) -> Optional[str]:
155+
"""Resolve agent ID via Projects SDK first (for RAI agents), fallback to AgentsClient.
156+
157+
Args:
158+
agent_id: The agent ID to resolve
159+
160+
Returns:
161+
The resolved agent ID if found, None otherwise
162+
"""
163+
# Try Projects SDK first (RAI agents were created via project_client)
164+
try:
165+
if self.project_client:
166+
agent = await self.project_client.agents.get_agent(agent_id)
167+
if agent and agent.id:
168+
self.logger.info(
169+
"RAI.AgentReuseSuccess: Resolved agent via Projects SDK (id=%s)",
170+
agent.id,
171+
)
172+
return agent.id
173+
except Exception as ex:
174+
self.logger.warning(
175+
"RAI.AgentReuseMiss: Projects SDK get_agent failed (reason=ProjectsGetFailed, id=%s): %s",
176+
agent_id,
177+
ex,
178+
)
179+
180+
# Fallback via AgentsClient (endpoint)
181+
try:
182+
if self.client:
183+
agent = await self.client.get_agent(agent_id=agent_id)
184+
if agent and agent.id:
185+
self.logger.info(
186+
"RAI.AgentReuseSuccess: Resolved agent via AgentsClient (id=%s)",
187+
agent.id,
188+
)
189+
return agent.id
190+
except Exception as ex:
191+
self.logger.warning(
192+
"RAI.AgentReuseMiss: AgentsClient get_agent failed (reason=EndpointGetFailed, id=%s): %s",
193+
agent_id,
194+
ex,
195+
)
196+
197+
self.logger.error(
198+
"RAI.AgentReuseMiss: Agent ID not resolvable via any client (reason=ClientMismatch, id=%s)",
199+
agent_id,
200+
)
201+
return None
202+
152203
def get_agent_id(self, chat_client) -> str:
153204
"""Return the underlying agent ID."""
154205
if chat_client and chat_client.agent_id is not None:
@@ -171,29 +222,69 @@ async def get_database_team_agent(self) -> Optional[AzureAIAgentClient]:
171222
self.memory_store, self.team_config, self.agent_name
172223
)
173224

174-
if agent_id:
175-
agent = await self.client.get_agent(agent_id=agent_id)
176-
if agent and agent.id is not None:
177-
chat_client = AzureAIAgentClient(
178-
project_endpoint=self.project_endpoint,
179-
agent_id=agent.id,
180-
model_deployment_name=self.model_deployment_name,
181-
async_credential=self.creds,
182-
)
225+
if not agent_id:
226+
self.logger.info(
227+
"RAI reuse: no stored agent id (agent_name=%s)", self.agent_name
228+
)
229+
return None
230+
231+
# Use resolve_agent_id to try Projects SDK first, then AgentsClient
232+
resolved = await self.resolve_agent_id(agent_id)
233+
if not resolved:
234+
self.logger.error(
235+
"RAI.AgentReuseMiss: stored id %s not resolvable (agent_name=%s)",
236+
agent_id,
237+
self.agent_name,
238+
)
239+
return None
240+
241+
# Create client with resolved ID, preferring project_client for RAI agents
242+
if self.agent_name == "RAIAgent" and self.project_client:
243+
chat_client = AzureAIAgentClient(
244+
project_client=self.project_client,
245+
agent_id=resolved,
246+
async_credential=self.creds,
247+
)
248+
self.logger.info(
249+
"RAI.AgentReuseSuccess: Created AzureAIAgentClient via Projects SDK (id=%s)",
250+
resolved,
251+
)
252+
else:
253+
chat_client = AzureAIAgentClient(
254+
project_endpoint=self.project_endpoint,
255+
agent_id=resolved,
256+
model_deployment_name=self.model_deployment_name,
257+
async_credential=self.creds,
258+
)
259+
self.logger.info(
260+
"Created AzureAIAgentClient via endpoint (id=%s)", resolved
261+
)
183262

184-
except (
185-
Exception
186-
) as ex: # Consider narrowing this to specific exceptions if possible
187-
self.logger.error("Failed to initialize Get database team agent: %s", ex)
263+
except Exception as ex:
264+
self.logger.error(
265+
"Failed to initialize Get database team agent (agent_name=%s): %s",
266+
self.agent_name,
267+
ex,
268+
)
188269
return chat_client
189270

190271
async def save_database_team_agent(self) -> None:
191-
"""Save current team agent to database."""
272+
"""Save current team agent to database (only if truly new or changed)."""
192273
try:
193274
if self._agent.id is None:
194275
self.logger.error("Cannot save database team agent: agent_id is None")
195276
return
196277

278+
# Check if stored ID matches current ID
279+
stored_id = await get_database_team_agent_id(
280+
self.memory_store, self.team_config, self.agent_name
281+
)
282+
if stored_id == self._agent.id:
283+
self.logger.info(
284+
"RAI reuse: id unchanged (id=%s); skip save.", self._agent.id
285+
)
286+
return
287+
197288
currentAgent = CurrentTeamAgent(
198289
team_id=self.team_config.team_id,
199290
team_name=self.team_config.name,
@@ -203,9 +294,14 @@ async def save_database_team_agent(self) -> None:
203294
agent_instructions=self.agent_instructions,
204295
)
205296
await self.memory_store.add_team_agent(currentAgent)
297+
self.logger.info(
298+
"Saved team agent to database (agent_name=%s, id=%s)",
299+
self.agent_name,
300+
self._agent.id,
301+
)
206302

207303
except Exception as ex:
208-
self.logger.error("Failed to save save database: %s", ex)
304+
self.logger.error("Failed to save database: %s", ex)
209305

210306
async def _prepare_mcp_tool(self) -> None:
211307
"""Translate MCPConfig to a HostedMCPTool (agent_framework construct)."""
@@ -243,6 +339,7 @@ def __init__(
243339
agent_name: str | None = None,
244340
agent_description: str | None = None,
245341
agent_instructions: str | None = None,
342+
project_client=None,
246343
) -> None:
247344
super().__init__(
248345
mcp=mcp,
@@ -254,6 +351,7 @@ def __init__(
254351
agent_description=agent_description,
255352
agent_instructions=agent_instructions,
256353
model_deployment_name=model_deployment_name,
354+
project_client=project_client,
257355
)
258356

259357
self._created_ephemeral: bool = (

src/backend/v4/magentic_agents/foundry_agent.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def __init__(
4141
team_config: TeamConfiguration | None = None,
4242
memory_store: DatabaseBase | None = None,
4343
) -> None:
44+
# Get project_client before calling super().__init__
45+
project_client = config.get_ai_project_client()
46+
4447
super().__init__(
4548
mcp=mcp_config,
4649
model_deployment_name=model_deployment_name,
@@ -51,12 +54,12 @@ def __init__(
5154
agent_name=agent_name,
5255
agent_description=agent_description,
5356
agent_instructions=agent_instructions,
57+
project_client=project_client,
5458
)
5559

5660
self.enable_code_interpreter = enable_code_interpreter
5761
self.search = search_config
5862
self.logger = logging.getLogger(__name__)
59-
self.project_client = config.get_ai_project_client()
6063

6164
# Decide early whether Azure Search mode should be activated
6265
self._use_azure_search = self._is_azure_search_requested()
@@ -269,10 +272,7 @@ async def _after_open(self) -> None:
269272
temperature=temp,
270273
model_id=self.model_deployment_name,
271274
)
272-
273275
self.logger.info("Initialized ChatAgent '%s'", self.agent_name)
274-
if not chatClient: # Only save if we didn't load from DB
275-
await self.save_database_team_agent()
276276

277277
except Exception as ex:
278278
self.logger.error("Failed to initialize ChatAgent: %s", ex)
@@ -299,7 +299,12 @@ async def invoke(self, prompt: str):
299299

300300
messages = [ChatMessage(role=Role.USER, text=prompt)]
301301

302-
async for update in self._agent.run_stream(messages=messages):
302+
agent_saved = False
303+
async for update in self._agent.run_stream(messages):
304+
# Save agent ID only once on first update (agent ID won't change during streaming)
305+
if not agent_saved and self._agent.chat_client.agent_id:
306+
await self.save_database_team_agent()
307+
agent_saved = True
303308
yield update
304309

305310
# -------------------------

0 commit comments

Comments
 (0)