Skip to content

Commit 8b1f1bf

Browse files
fix: extend Polly timeouts for local Ollama chat inference
qwen2.5-coder:7b consistently takes >30s, causing Polly's default TotalRequestTimeout to reject every chat response. Override via PostConfigureAll<HttpStandardResilienceOptions> when UseLocalAI=true (dev-only path): - TotalRequestTimeout: 30s 10min - AttemptTimeout: 10s 5min - CircuitBreaker.SamplingDuration: 30s 11min (Polly requires >= 2x AttemptTimeout) The global override is acceptable here: this code path only runs when the Ollama local-AI flag is set, which is developer-only.
1 parent bea1fb9 commit 8b1f1bf

1 file changed

Lines changed: 16 additions & 0 deletions

File tree

EssentialCSharp.Web/Program.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
2020
using Microsoft.EntityFrameworkCore;
2121
using Microsoft.Extensions.Diagnostics.HealthChecks;
22+
using Microsoft.Extensions.Http.Resilience;
2223
using OpenTelemetry;
2324
using OpenTelemetry.Instrumentation.AspNetCore;
2425
using OpenTelemetry.Metrics;
@@ -243,6 +244,21 @@ private static void Main(string[] args)
243244
// AIOptions__UseLocalAI=true enables Ollama local mode (set via aspire secret or dashboard).
244245
builder.AddAIServices(configuration);
245246

247+
// When using local Ollama, Polly's default 30s TotalRequestTimeout fires before LLM inference
248+
// completes (qwen2.5-coder:7b consistently takes >30s). Override globally — this code path
249+
// is only reached in local dev when UseLocalAI=true, so widening all clients is acceptable.
250+
var aiOptsForTimeout = configuration.GetSection("AIOptions").Get<EssentialCSharp.Chat.AIOptions>();
251+
if (aiOptsForTimeout?.UseLocalAI == true)
252+
{
253+
builder.Services.PostConfigureAll<HttpStandardResilienceOptions>(options =>
254+
{
255+
options.TotalRequestTimeout.Timeout = TimeSpan.FromMinutes(10);
256+
options.AttemptTimeout.Timeout = TimeSpan.FromMinutes(5);
257+
// Polly requires SamplingDuration >= 2x AttemptTimeout; default 30s is now invalid.
258+
options.CircuitBreaker.SamplingDuration = TimeSpan.FromMinutes(11);
259+
});
260+
}
261+
246262
// Add Rate Limiting for API endpoints
247263
builder.Services.AddRateLimiter(options =>
248264
{

0 commit comments

Comments
 (0)