Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/whole-places-beam.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'mastra': patch
---

Fixed server deploy getting permanently stuck in 'queued' status when the upload confirmation step fails. The CLI now retries transient failures (5xx, 401) up to 3 times with exponential backoff, and automatically cancels orphaned deploys when upload or confirmation fails. Added user-visible log messages during retries and cleanup so deploy failures are no longer silent.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
99 changes: 79 additions & 20 deletions packages/cli/src/commands/server/platform-api.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { withPollingRetries } from '../../utils/polling.js';
import { withPollingRetries, isRetryablePollingError } from '../../utils/polling.js';
import { createApiClient, throwApiError } from '../auth/client.js';
import { getToken } from '../auth/credentials.js';
import type { paths } from '../platform-api.js';
Expand Down Expand Up @@ -94,33 +94,92 @@ export async function uploadServerDeploy(
throw new Error('No upload URL returned');
}

// Best-effort cancel helper — used to clean up orphaned deploys on failure
async function cancelDeploy(deployClient: ReturnType<typeof createApiClient>) {
try {
console.warn(`Cancelling deploy ${id}...`);
await deployClient.POST('/v1/server/deploys/{id}/cancel', {
params: { path: { id } },
});
} catch {
console.warn(`Warning: failed to cancel deploy ${id}. It may remain in a queued state.`);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

// Step 2: Upload artifact to the signed URL
if (uploadUrl.startsWith('file://')) {
const { writeFile } = await import('node:fs/promises');
const { fileURLToPath } = await import('node:url');
await writeFile(fileURLToPath(uploadUrl), Buffer.from(zipBuffer));
} else {
const uploadResp = await fetch(uploadUrl, {
method: 'PUT',
headers: { 'Content-Type': 'application/zip' },
body: new Uint8Array(zipBuffer),
});
if (!uploadResp.ok) {
throw new Error(`Artifact upload failed: ${uploadResp.status} ${uploadResp.statusText}`);
try {
if (uploadUrl.startsWith('file://')) {
const { writeFile } = await import('node:fs/promises');
const { fileURLToPath } = await import('node:url');
await writeFile(fileURLToPath(uploadUrl), Buffer.from(zipBuffer));
} else {
const uploadResp = await fetch(uploadUrl, {
method: 'PUT',
headers: { 'Content-Type': 'application/zip' },
body: new Uint8Array(zipBuffer),
});
if (!uploadResp.ok) {
throw new Error(`Artifact upload failed: ${uploadResp.status} ${uploadResp.statusText}`);
}
}
} catch (uploadError) {
await cancelDeploy(client);
throw uploadError;
}

// Step 3: Notify API that upload is complete → triggers build pipeline
const { error: completeError, response: completeResponse } = await client.POST(
'/v1/server/deploys/{id}/upload-complete',
{ params: { path: { id } } },
);
// Retry up to 3 times (4 total attempts) with exponential backoff for transient failures.
const maxRetries = 3;
let lastError: Error | undefined;
let currentClient = client;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
let completeError: unknown;
let status: number | undefined;

try {
const result = await currentClient.POST('/v1/server/deploys/{id}/upload-complete', {
params: { path: { id } },
});
if (!result.error) {
return { id, status: 'queued' };
}
completeError = result.error;
status = result.response.status;
} catch (networkError) {
// Network-level failure (ECONNRESET, ETIMEDOUT, fetch failed, etc.)
completeError = networkError;
}

// Determine if we should retry
const isRetryableStatus = status !== undefined && (status >= 500 || status === 401);
const isRetryableNetwork = isRetryablePollingError(completeError);
const isRetryable = isRetryableStatus || isRetryableNetwork;

if (!isRetryable || attempt === maxRetries) {
const detail = status ? `${status}` : completeError instanceof Error ? completeError.message : 'unknown error';
lastError = new Error(`Upload confirmation failed: ${detail}`);
break;
}

const delay = 1000 * Math.pow(2, attempt);
const detail = status ? `${status}` : completeError instanceof Error ? completeError.message : 'network error';
console.warn(
`Upload confirmation failed (${detail}), retrying in ${delay / 1000}s... (attempt ${attempt + 1}/${maxRetries})`,
);

// On 401, refresh the token before retrying (same pattern as pollServerDeploy)
if (status === 401) {
const freshToken = await getToken();
currentClient = createApiClient(freshToken, orgId);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

if (completeError) {
throwApiError('Upload confirmation failed', completeResponse.status);
// Exponential backoff: 1s, 2s, 4s
await new Promise(r => setTimeout(r, delay));
}

return { id, status: 'queued' };
// All retries exhausted — cancel the orphaned deploy and throw
await cancelDeploy(currentClient);
throw lastError ?? new Error('Upload confirmation failed');
}

export async function pollServerDeploy(
Expand Down
Loading