atomicmemory · moralespanitz · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 6, 2026
@@ -13,6 +13,17 @@
 #   npx fallow dupes  --save-baseline=.fallow/dupes-baseline.json
 BASE=$(git symbolic-ref --short refs/remotes/origin/HEAD 2>/dev/null)
 BASE=${BASE:-origin/main}
+# Git sets GIT_INDEX_FILE/GIT_DIR before invoking pre-commit hooks. Fallow's
+# `git worktree add` for base-ref scanning performs a checkout that writes
+# to GIT_INDEX_FILE if it is set — which corrupts the main worktree's index
+# by replacing it with the base ref's tree (silently deleting files the
+# feature branch added that don't exist on the base ref, then committing
+# those deletions). Reproduced on this repo's worktrees during the PR #18
+# review-response work. Unset both vars so any nested git invocation runs
+# against the worktree's own default index. (Same fix lives at
+# atomicmemory-benchmarks `.husky/pre-commit` commit `327326a`.)
+unset GIT_INDEX_FILE
+unset GIT_DIR
 npx fallow audit \
   --health-baseline=.fallow/health-baseline.json \
   --dupes-baseline=.fallow/dupes-baseline.json \

@@ -0,0 +1,48 @@
+# LiteLLM proxy environment template.
+#
+# Copy to docker/litellm/.env (gitignored) before starting the proxy:
+#   cp docker/litellm/.env.example docker/litellm/.env
+#
+# Only fill in the providers you actually need. Unset keys mean the
+# corresponding model alias will return 401 at call time — startup is
+# unaffected.
+
+# --- Proxy auth (required) -------------------------------------------------
+# Master key clients send as `Authorization: Bearer <key>`. Generate any
+# random opaque string for non-loopback deployments.
+LITELLM_MASTER_KEY=sk-litellm-master
+
+# --- Anthropic -------------------------------------------------------------
+ANTHROPIC_API_KEY=
+
+# --- OpenAI ----------------------------------------------------------------
+OPENAI_API_KEY=
+
+# --- Microsoft Foundry / Azure AI Projects --------------------------------
+# Foundry exposes an OpenAI-compatible endpoint at:
+#   ${FOUNDRY_API_BASE}/openai/v1/chat/completions
+# (Project Inference API; NOT the legacy Azure deployments path that requires
+# api-version). LiteLLM routes via the `openai/` provider with a custom
+# api_base, so we set FOUNDRY_API_BASE_OPENAI to that full path.
+#
+# Mirrors PROJECT_ENDPOINT used in atomicmemory-benchmarks
+# (data/exp-cr-mini/foundry-client.ts). The proxy needs a static API key —
+# if your Foundry deployment is Entra-only (no key), keep using the direct
+# foundry-client.ts path and route only non-Foundry models through this proxy.
+FOUNDRY_API_BASE=
+FOUNDRY_API_BASE_OPENAI=                            # set to ${FOUNDRY_API_BASE}/openai/v1
+FOUNDRY_API_KEY=
+FOUNDRY_API_VERSION=2024-12-01-preview              # currently unused by foundry-gpt-5-chat (kept for future azure/ entries)
+
+# --- AWS Bedrock -----------------------------------------------------------
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_REGION_NAME=us-east-1
+
+# --- Google Gemini ---------------------------------------------------------
+# Use Gemini 2.5 family (gemini-2-5-flash, gemini-2-5-pro). The 1.5 series
+# is unavailable on most current API keys; 2.0 Flash is no longer offered to
+# new accounts. NOTE: 2.5 generates reasoning tokens that count against
+# max_tokens — clients should send max_tokens >= 500 (Flash) or >= 1500 (Pro)
+# or responses will be truncated to empty content.
+GEMINI_API_KEY=
@@ -0,0 +1,80 @@
+# LiteLLM unified gateway
+
+Run a [LiteLLM](https://github.com/BerriAI/litellm) proxy locally to route AtomicMemory's LLM calls to Anthropic, OpenAI, Microsoft Foundry / Azure, AWS Bedrock, or Google Gemini through a single OpenAI-compatible endpoint. Provider swap is config-only — no code changes in `atomicmemory-core`.
+
+## Why this exists
+
+`atomicmemory-core` already supports any OpenAI-compatible endpoint via:
+
+```
+LLM_PROVIDER=openai-compatible
+LLM_API_URL=<base_url>
+LLM_API_KEY=<key>
+LLM_MODEL=<model alias>
+```
+
+(see `src/services/llm.ts` -> `OpenAICompatibleLLM`). The LiteLLM proxy *is* an OpenAI-compatible endpoint, so wiring it up is purely an infra/config change. No new provider lane in `llm.ts`.
+
+## Quick start
+
+```bash
+# 1. Set provider credentials (fill in the ones you need)
+cp docker/litellm/.env.example docker/litellm/.env
+$EDITOR docker/litellm/.env
+
+# 2. Start the proxy on http://localhost:4000
+docker compose -f docker/litellm/docker-compose.litellm.yml up -d
+
+# 3. Sanity-check it's up
+curl -s http://localhost:4000/health/liveliness
+# -> {"status":"alive",...}
+
+# 4. List configured models
+curl -s http://localhost:4000/v1/models \
+  -H "Authorization: Bearer $LITELLM_MASTER_KEY"
+```
+
+Point AtomicMemory at it (your `.env`):
+
+```
+LLM_PROVIDER=openai-compatible
+LLM_API_URL=http://localhost:4000
+LLM_API_KEY=sk-litellm-master       # value of LITELLM_MASTER_KEY in docker/litellm/.env
+LLM_MODEL=anthropic-haiku-4-5       # or any model_name from litellm-config.yaml
+```
+
+Restart the core dev server to pick up the new env. The proxy and core can run side-by-side because they're on different ports (4000 vs 3050).
+
+## Switching providers at runtime
+
+Two options:
+
+1. **Env-only.** Change `LLM_MODEL` to a different `model_name` from `litellm-config.yaml` and restart core.
+2. **Per-request.** Use `config_override.llm_model` in the ingest/search request body (see core's per-request override pattern). The proxy is stateless w.r.t. AtomicMemory, so swapping models per request just changes which `model_list` entry the proxy resolves.
+
+## Configured providers
+
+| `model_name` (LLM_MODEL value) | Upstream | Required env |
+|---|---|---|
+| `anthropic-haiku-4-5` | Anthropic | `ANTHROPIC_API_KEY` |
+| `anthropic-sonnet-4-6` | Anthropic | `ANTHROPIC_API_KEY` |
+| `openai-gpt-5-chat` | OpenAI | `OPENAI_API_KEY` |
+| `openai-gpt-4o-mini` | OpenAI | `OPENAI_API_KEY` |
+| `foundry-gpt-5-chat` | Microsoft Foundry / Azure AI Projects | `FOUNDRY_API_BASE`, `FOUNDRY_API_KEY`, `FOUNDRY_API_VERSION` |
+| `bedrock-claude-sonnet` | AWS Bedrock | `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION_NAME` |
+| `gemini-1-5-pro` | Google Gemini direct API | `GEMINI_API_KEY` |
+
+To add a model, append a `model_list` entry to `litellm-config.yaml` and restart the proxy. No core code change required.
+
+## Limitations / known caveats
+
+- **Cost telemetry.** Core's `cost-telemetry.ts` estimates cost from the model name and the OpenAI-compatible `usage` block returned by the proxy. Per-provider rates aren't perfectly mirrored across providers behind LiteLLM yet — Bedrock especially does its own per-deployment pricing. Treat per-call cost estimates as upper-bound when routed through the proxy. LiteLLM does emit an `x-litellm-response-cost` HTTP header; wiring core to read it is future work.
+- **Streaming.** The proxy supports streaming, but core's LLM call sites don't currently stream — irrelevant today.
+- **Foundry + Entra ID auth.** LiteLLM's `azure/` provider requires a static API key. The existing `foundry-client.ts` in `atomicmemory-benchmarks` uses `DefaultAzureCredential`. If your Foundry deployment is Entra-only with no static key, keep using `foundry-client.ts` directly and route only the other providers through LiteLLM.
+- **Model name format.** Aliases here use kebab-case (`anthropic-haiku-4-5`). If a downstream tool expects the upstream model id verbatim, use the alias for routing and let LiteLLM translate.
+
+## Where this fits
+
+- `litellm-config.yaml` — model routing table (provider keys via `os.environ/...`).
+- `docker-compose.litellm.yml` — sidecar service definition (port 4000, healthcheck, env wiring).
+- `.env.example` — credential template for the host environment.
@@ -0,0 +1,65 @@
+# LiteLLM proxy as a sidecar service for AtomicMemory.
+#
+# Runs at http://localhost:4000 by default — distinct from the core dev
+# server (3050) and the docker-compose.yml app service (also 3050) so the
+# two can run side-by-side. Provider credentials are pulled from the host
+# environment (or a .env in this directory) and resolved by the proxy at
+# request time per `os.environ/...` references in litellm-config.yaml.
+#
+# Up:    docker compose -f docker/litellm/docker-compose.litellm.yml up -d
+# Logs:  docker compose -f docker/litellm/docker-compose.litellm.yml logs -f litellm
+# Down:  docker compose -f docker/litellm/docker-compose.litellm.yml down
+#
+# After the container is healthy, point AtomicMemory at it:
+#   LLM_PROVIDER=openai-compatible
+#   LLM_API_URL=http://localhost:4000
+#   LLM_API_KEY=$LITELLM_MASTER_KEY
+#   LLM_MODEL=anthropic-haiku-4-5     # or any model_name from litellm-config.yaml
+
+# Pin the compose project name so this stack never collides with another
+# `litellm/` directory's compose project. Without this, docker compose
+# derives the project name from the parent directory (`litellm`) which is a
+# very common name and will silently hijack siblings' containers.
+name: atomicmemory-litellm
+
+services:
+  litellm:
+    # Pinned image tag. `main-stable` is BerriAI's rolling stable channel.
+    # Bump explicitly when validating a new version against AtomicMemory.
+    image: ghcr.io/berriai/litellm:main-stable
+    container_name: atomicmemory-litellm
+    restart: unless-stopped
+    ports:
+      - "${LITELLM_PORT:-4000}:4000"
+    # Mount the config read-only so accidental writes from inside the
+    # container can't drift it.
+    volumes:
+      - ./litellm-config.yaml:/app/config.yaml:ro
+    command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
+    environment:
+      # Proxy auth — set in your shell or the .env file alongside this compose.
+      LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-sk-litellm-master}
+      # Provider credentials. Anything unset just makes the corresponding
+      # model alias 401 at call time; the proxy still starts.
+      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
+      OPENAI_API_KEY: ${OPENAI_API_KEY:-}
+      FOUNDRY_API_BASE: ${FOUNDRY_API_BASE:-}
+      FOUNDRY_API_BASE_OPENAI: ${FOUNDRY_API_BASE_OPENAI:-}
+      FOUNDRY_API_KEY: ${FOUNDRY_API_KEY:-}
+      FOUNDRY_API_VERSION: ${FOUNDRY_API_VERSION:-2024-12-01-preview}
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}
+      AWS_REGION_NAME: ${AWS_REGION_NAME:-us-east-1}
+      GEMINI_API_KEY: ${GEMINI_API_KEY:-}
+    healthcheck:
+      # /health/liveliness is the LiteLLM-blessed unauthenticated probe.
+      test: ["CMD", "wget", "-qO-", "http://localhost:4000/health/liveliness"]
+      interval: 15s
+      timeout: 5s
+      retries: 3
+      start_period: 30s
+    logging:
+      driver: json-file
+      options:
+        max-size: "10m"
+        max-file: "3"
@@ -0,0 +1,131 @@
+# LiteLLM proxy configuration for AtomicMemory.
+#
+# Why this exists
+# ---------------
+# AtomicMemory's LLM lane already supports any OpenAI-compatible endpoint via
+# `LLM_PROVIDER=openai-compatible` + `LLM_API_URL` + `LLM_API_KEY`
+# (see src/services/llm.ts -> OpenAICompatibleLLM). Pointing that lane at a
+# LiteLLM proxy gives us a single, config-driven seam to swap LLM providers
+# (Anthropic, OpenAI, Microsoft Foundry / Azure, AWS Bedrock, Google Gemini)
+# without code changes in core.
+#
+# How it's used
+# -------------
+# 1. Start the proxy:        docker compose -f docker/litellm/docker-compose.litellm.yml up -d
+# 2. In core .env, set:      LLM_PROVIDER=openai-compatible
+#                            LLM_API_URL=http://localhost:4000
+#                            LLM_API_KEY=sk-litellm-master   # the master_key below
+#                            LLM_MODEL=anthropic-haiku-4-5   # any model_name from the model_list
+# 3. Restart the core dev server (do NOT bounce a running multirun's server).
+#
+# Provider credentials live in the host environment, NOT in this file. The
+# proxy resolves `os.environ/VAR_NAME` at request time, so the same config.yaml
+# works in dev (one credential set) and in CI/prod (a different set) without
+# editing it. Missing credentials surface as 401 from the *target provider*
+# only when that model is actually called — startup does not block.
+#
+# Model naming convention
+# -----------------------
+# `model_name` is the alias clients use (LLM_MODEL on the AtomicMemory side
+# OR `model` in the OpenAI request body). `litellm_params.model` is the
+# upstream provider+model identifier LiteLLM understands. We name aliases
+# `<provider>-<short-model>` so dashboard reads stay legible.
+
+model_list:
+  # ---------------------------------------------------------------------------
+  # Anthropic — direct API. Default backbone for AtomicMemory today.
+  # Requires: ANTHROPIC_API_KEY
+  # ---------------------------------------------------------------------------
+  - model_name: anthropic-haiku-4-5
+    litellm_params:
+      model: anthropic/claude-haiku-4-5
+      api_key: os.environ/ANTHROPIC_API_KEY
+
+  - model_name: anthropic-sonnet-4-6
+    litellm_params:
+      model: anthropic/claude-sonnet-4-6
+      api_key: os.environ/ANTHROPIC_API_KEY
+
+  # ---------------------------------------------------------------------------
+  # OpenAI — direct API.
+  # Requires: OPENAI_API_KEY
+  # ---------------------------------------------------------------------------
+  - model_name: openai-gpt-5-chat
+    litellm_params:
+      model: openai/gpt-5-chat
+      api_key: os.environ/OPENAI_API_KEY
+
+  - model_name: openai-gpt-4o-mini
+    litellm_params:
+      model: openai/gpt-4o-mini
+      api_key: os.environ/OPENAI_API_KEY
+
+  # ---------------------------------------------------------------------------
+  # Microsoft Foundry / Azure AI Projects.
+  # The Foundry sprint already provisioned a deployment; reuse its endpoint.
+  # Requires:
+  #   FOUNDRY_API_BASE   = same value as PROJECT_ENDPOINT (https://<resource>.services.ai.azure.com/api/projects/<project>)
+  #   FOUNDRY_API_KEY    = Azure AI Projects key. If using `az login` / Entra
+  #                        identity instead of a key, leave unset and use the
+  #                        Foundry-direct path in data/exp-cr-mini/foundry-client.ts.
+  #                        LiteLLM proxy needs a static key for `azure/`.
+  #   FOUNDRY_API_VERSION (optional, defaults to 2024-12-01-preview)
+  # ---------------------------------------------------------------------------
+  # Foundry exposes an OpenAI-compatible path at `${FOUNDRY_API_BASE}/openai/v1`
+  # (the project-endpoint Responses-and-Chat API), NOT the legacy Azure
+  # `/openai/deployments/<name>/chat/completions?api-version=X` path. So we
+  # route via LiteLLM's `openai/` provider with a custom api_base — same
+  # pattern as any OpenAI-compatible endpoint. No api-version needed.
+  - model_name: foundry-gpt-5-chat
+    litellm_params:
+      model: openai/gpt-5-chat
+      api_base: os.environ/FOUNDRY_API_BASE_OPENAI
+      api_key: os.environ/FOUNDRY_API_KEY
+
+  # ---------------------------------------------------------------------------
+  # AWS Bedrock — Claude Sonnet via Bedrock. Placeholder; no creds yet.
+  # Requires: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION_NAME
+  # ---------------------------------------------------------------------------
+  - model_name: bedrock-claude-sonnet
+    litellm_params:
+      model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
+      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
+
+  # ---------------------------------------------------------------------------
+  # Google Gemini — direct API (not Vertex). Placeholder; no creds yet.
+  # Requires: GEMINI_API_KEY (or set GOOGLE_API_KEY -- LiteLLM accepts either)
+  # ---------------------------------------------------------------------------
+  # Gemini 1.5 series isn't available on most current API keys; use 2.5 family.
+  - model_name: gemini-2-5-pro
+    litellm_params:
+      model: gemini/gemini-2.5-pro
+      api_key: os.environ/GEMINI_API_KEY
+
+  - model_name: gemini-2-5-flash
+    litellm_params:
+      model: gemini/gemini-2.5-flash
+      api_key: os.environ/GEMINI_API_KEY
+
+litellm_settings:
+  # Drop unsupported per-provider params (e.g. seed for providers that ignore
+  # it) instead of erroring. Keeps the AtomicMemory LLM call shape stable
+  # across providers — a key reason we're using the gateway.
+  drop_params: true
+  # Per-call timeout. AtomicMemory's AUDN path can produce long prompts; 90s
+  # bounds wall-time without truncating legit slow responses.
+  request_timeout: 90
+  # Surface upstream cost telemetry on /v1/chat/completions response, so
+  # cost-telemetry.ts can keep accounting per-provider when we route through
+  # the proxy. (LiteLLM emits `x-litellm-response-cost` header + `usage` block.)
+  set_verbose: false
+
+general_settings:
+  # Proxy-level auth. Clients (atomicmemory-core, harness) must send this as
+  # the `Authorization: Bearer <master_key>`; the proxy then injects the
+  # provider-specific creds resolved from `os.environ/...` above.
+  #
+  # The default value is a developer placeholder. Override it in your
+  # docker-compose env (LITELLM_MASTER_KEY=...) for any non-loopback use.
+  master_key: os.environ/LITELLM_MASTER_KEY