Final retry

Shreyas-Microsoft · Shreyas-Microsoft · commit a3da8ffa0b22 · 2025-06-26T22:31:52.000+05:30
diff --git a/src/backend/sql_agents/convert_script.py b/src/backend/sql_agents/convert_script.py
@@ -1,4 +1,4 @@
-"""This module loops through each file in a batch and processes it using the SQL agents.More actions
+"""This module loops through each file in a batch and processes it using the SQL agents.
 It sets up a group chat for the agents, sends the source script to the chat, and processes
 the responses from the agents. It also reports in real-time to the client using websockets
 and updates the database with the results.
@@ -49,9 +49,9 @@ async def convert_script(
     #retry logic comms manager
     comms_manager = CommsManager(
         sql_agents.idx_agents,
-        max_retries=3,          # Retry up to 5 times for rate limits
-        initial_delay=0.2,      # Start with 1 second delay
-        backoff_factor=1.2,     # Double delay each retry
+        max_retries=5,          # Retry up to 5 times for rate limits
+        initial_delay=1.0,      # Start with 1 second delay
+        backoff_factor=2.0,     # Double delay each retry
     )
 
     # send websocket notification that file processing has started
@@ -354,4 +354,6 @@ async def validate_migration(
         log_type=LogType.SUCCESS,
         agent_type=AgentType.ALL,
         author_role=AuthorRole.ASSISTANT,
-    )
+    )
+
+    return True
diff --git a/src/backend/sql_agents/helpers/comms_manager.py b/src/backend/sql_agents/helpers/comms_manager.py
@@ -27,15 +27,6 @@ class CommsManager:
     # regex to extract the recommended wait time in seconds from response
     _EXTRACT_WAIT_TIME = r"in (\d+) seconds"
 
-    # Rate limit error indicators
-    _RATE_LIMIT_INDICATORS = [
-        "rate limit",
-        "too many requests",
-        "quota exceeded",
-        "throttled",
-        "429",
-    ]
-
     group_chat: AgentGroupChat = None
 
     class SelectionStrategy(SequentialSelectionStrategy):
@@ -128,8 +119,8 @@ def __init__(
         agent_dict,
         exception_types: tuple = (Exception,),
         max_retries: int = 10,
-        initial_delay: float = 0.5,
-        backoff_factor: float = 1.5,
+        initial_delay: float = 1.0,
+        backoff_factor: float = 2.0,
         simple_truncation: int = None,
     ):
         """Initialize the CommsManager and agent_chat with the given agents.
@@ -138,11 +129,18 @@ def __init__(
             agent_dict: Dictionary of agents
             exception_types: Tuple of exception types that should trigger a retry
             max_retries: Maximum number of retry attempts (default: 10)
-            initial_delay: Initial delay in seconds before first retry (default: 0.5)
-            backoff_factor: Factor by which the delay increases with each retry (default: 1.5)
+            initial_delay: Initial delay in seconds before first retry (default: 1.0)
+            backoff_factor: Factor by which the delay increases with each retry (default: 2.0)
             simple_truncation: Optional truncation limit for chat history
         """
-        # Initialize the group chat (exactly like original)
+        # Store retry configuration
+        self.max_retries = max_retries
+        self.initial_delay = initial_delay
+        self.backoff_factor = backoff_factor
+        self.exception_types = exception_types
+        self.simple_truncation = simple_truncation
+        
+        # Initialize the group chat (maintaining original functionality)
         self.group_chat = AgentGroupChat(
             agents=agent_dict.values(),
             termination_strategy=self.ApprovalTerminationStrategy(
@@ -156,130 +154,99 @@ def __init__(
             selection_strategy=self.SelectionStrategy(agents=agent_dict.values()),
         )
 
-        # Store retry configuration
-        self.max_retries = max_retries
-        self.initial_delay = initial_delay
-        self.backoff_factor = backoff_factor
-        self.exception_types = exception_types
-        self.simple_truncation = simple_truncation
-
-        # Adaptive retry state - starts optimistic
-        self._rate_limit_detected_recently = False
-        self._consecutive_successes = 0
-        self._session_has_rate_limits = False
-
-    def _is_rate_limit_error(self, error_message: str) -> bool:
-        """Check if the error message indicates a rate limit issue."""
-        error_lower = error_message.lower()
-        return any(indicator in error_lower for indicator in self._RATE_LIMIT_INDICATORS)
-
-    def _should_use_zero_overhead_path(self) -> bool:
-        """
-        Determine if we should use zero-overhead path.
-        
-        Use zero overhead when:
-        - No rate limits detected in current session AND
-        - We have some successful calls OR this is the first call
-        """
-        return (
-            not self._session_has_rate_limits 
-            and (self._consecutive_successes >= 1 or self._consecutive_successes == 0)
-
-        )
-
-    async def _zero_overhead_invoke(self) -> AsyncIterable[ChatMessageContent]:
-        """Pure delegation to original group_chat.invoke() - zero overhead."""
-        async for item in self.group_chat.invoke():
-            yield item
+    async def invoke_async(self):
+        """Invoke the group chat with the given agents (original method maintained for compatibility)."""
+        return self.group_chat.invoke()
 
-    async def _retry_enabled_invoke(self) -> AsyncIterable[ChatMessageContent]:
-        """Invoke with retry logic - only used when rate limits are expected."""
+    async def async_invoke(self) -> AsyncIterable[ChatMessageContent]:
+        """Invoke the group chat with retry logic and error handling."""
         attempt = 0
         current_delay = self.initial_delay
 
-        # Create history snapshot only when we need it
-        history_snapshot = None
-
         while attempt < self.max_retries:
             try:
-                # Apply truncation if configured and on first attempt
+                # Grab a snapshot of the history of the group chat
+                # Using copy to avoid getting a reference to the original list
+                history_snap = copy.deepcopy(self.group_chat.history)
+                
+                self.logger.debug(
+                    "History before invoke: %s",
+                    [msg.name for msg in self.group_chat.history],
+                )
+                
+                # Get a fresh iterator from the function
+                async_iter = self.group_chat.invoke()
+
+                # If simple truncation is set, truncate the history
                 if (
-                    attempt == 0
-                    and self.simple_truncation
+                    self.simple_truncation
                     and len(self.group_chat.history) > self.simple_truncation
                 ):
-                    if history_snapshot is None:
-                        history_snapshot = copy.deepcopy(self.group_chat.history)
-                    self.group_chat.history = history_snapshot[-self.simple_truncation:]
-
+                    # Truncate the history to the last n messages
+                    self.group_chat.history = history_snap[-self.simple_truncation :]
 
-                # Execute and yield results
-                async for item in self.group_chat.invoke():
+                # Yield each item from the iterator
+                async for item in async_iter:
                     yield item
 
-                # Success - exit retry loop
-                return
+                # If we get here without exception, we're done
+                break
 
             except AgentInvokeException as aie:
-                # Create snapshot only when we actually need to retry
-                if history_snapshot is None:
-                    history_snapshot = copy.deepcopy(self.group_chat.history)
-
                 attempt += 1
                 if attempt >= self.max_retries:
                     self.logger.error(
-                        "AgentInvokeException: Max retries (%d) exceeded. Final error: %s",
+                        "Function invoke failed after %d attempts. Final error: %s. Consider increasing the models rate limit.",
                         self.max_retries,
                         str(aie),
                     )
+                    # Re-raise the last exception if all retries failed
                     raise
 
-                # Restore history from snapshot
-                self.group_chat.history = copy.deepcopy(history_snapshot)
-
-                # Check for rate limit specific wait time
-                wait_time_match = re.search(self._EXTRACT_WAIT_TIME, str(aie))
-                if wait_time_match:
-                    current_delay = int(wait_time_match.group(1))
-                    self.logger.info(
-                        "Rate limit detected, waiting %d seconds as requested",
-                        current_delay
+                # Return history state for retry
+                self.group_chat.history = history_snap
+
+                try:
+                    # Try to extract wait time from error message
+                    wait_time_match = re.search(self._EXTRACT_WAIT_TIME, str(aie))
+                    if wait_time_match:
+                        # If regex is found, set the delay to the value in seconds
+                        current_delay = int(wait_time_match.group(1))
+                    else:
+                        current_delay = self.initial_delay
+
+                    self.logger.warning(
+                        "Attempt %d/%d for function invoke failed: %s. Retrying in %.2f seconds...",
+                        attempt,
+                        self.max_retries,
+                        str(aie),
+                        current_delay,
                     )
-                else:
-                    current_delay = self.initial_delay * (self.backoff_factor ** (attempt - 1))
-
-                self.logger.warning(
-                    "Attempt %d/%d failed with AgentInvokeException: %s. Retrying in %.2f seconds...",
-                    attempt,
-                    self.max_retries,
-                    str(aie),
-                    current_delay,
-                )
-
-                await asyncio.sleep(current_delay)
 
+                    # Wait before retrying
+                    await asyncio.sleep(current_delay)
 
+                    if not wait_time_match:
+                        # Increase delay for next attempt using backoff factor
+                        current_delay *= self.backoff_factor
 
+                except Exception as ex:
+                    self.logger.error(
+                        "Retry error: %s. Using default delay.",
+                        ex,
+                    )
+                    current_delay = self.initial_delay
 
             except self.exception_types as e:
-                if history_snapshot is None:
-                    history_snapshot = copy.deepcopy(self.group_chat.history)
-
-
                 attempt += 1
                 if attempt >= self.max_retries:
                     self.logger.error(
-                        "Generic exception: Max retries (%d) exceeded. Final error: %s",
+                        "Function invoke failed after %d attempts. Final error: %s",
                         self.max_retries,
                         str(e),
                     )
                     raise
 
-                # Restore history from snapshot
-                self.group_chat.history = copy.deepcopy(history_snapshot)
-
-                current_delay = self.initial_delay * (self.backoff_factor ** (attempt - 1))
-                
                 self.logger.warning(
                     "Attempt %d/%d failed with %s: %s. Retrying in %.2f seconds...",
                     attempt,
@@ -290,77 +257,4 @@ async def _retry_enabled_invoke(self) -> AsyncIterable[ChatMessageContent]:
                 )
 
                 await asyncio.sleep(current_delay)
-
-
-    async def async_invoke(self) -> AsyncIterable[ChatMessageContent]:
-        """
-        Optimized invoke method that dynamically chooses between zero-overhead and retry modes.
-        
-        Performance targets:
-        - 200k tokens: 1.2 mins (zero overhead when no rate limits expected)
-        - 30k-50k tokens: 1.8-2 mins (retry overhead only when needed)
-        """
-
-        # Decide which path to take
-        use_zero_overhead = self._should_use_zero_overhead_path()
-        
-        if use_zero_overhead:
-            # Zero overhead path - matches original performance exactly
-            try:
-                async for item in self._zero_overhead_invoke():
-                    yield item
-
-                # Track success
-                self._consecutive_successes += 1
-                return
-
-            except (AgentInvokeException, *self.exception_types) as e:
-                # Check if this is a rate limit error
-                error_str = str(e)
-                if self._is_rate_limit_error(error_str):
-                    self.logger.info(
-                        "Rate limit detected on zero-overhead path, switching to retry mode for this session"
-                    )
-                    self._session_has_rate_limits = True
-                    self._rate_limit_detected_recently = True
-                    # Fall through to retry logic below
-                else:
-                    # Non-rate-limit error, re-raise immediately (fail fast)
-                    self.logger.error("Non-rate-limit error in zero-overhead path: %s", error_str)
-                    raise
-
-        # Retry-enabled path - used when rate limits are expected or detected
-        try:
-            async for item in self._retry_enabled_invoke():
-                yield item
-
-            # Track success
-            self._consecutive_successes += 1
-
-            # Gradually become more optimistic about rate limits
-            if self._consecutive_successes >= 5:
-                self._rate_limit_detected_recently = False
-                # Note: We keep _session_has_rate_limits = True to remember for this session
-
-
-
-
-        except Exception as e:
-            # Reset success counter on failure
-            self._consecutive_successes = 0
-            self._rate_limit_detected_recently = True
-            raise
-
-    async def invoke_async(self):
-        """Legacy method - maintained for compatibility."""
-        return self.group_chat.invoke()
-
-    def reset_rate_limit_state(self):
-        """
-        Reset rate limit detection state - call this between different processing sessions
-        if you want to reset the adaptive behavior.
-        """
-        self._rate_limit_detected_recently = False
-        self._consecutive_successes = 0
-        self._session_has_rate_limits = False
-        self.logger.info("Rate limit detection state reset")
+                current_delay *= self.backoff_factor