From 35a7c48396916e5ab30faa1ed55a1d49d3d47e7d Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 26 Jan 2026 18:49:34 -0500 Subject: [PATCH 1/2] fix: VS Code LM token counting returns 0 outside requests, breaking context condensing (EXT-620) - Modified VsCodeLmHandler.internalCountTokens() to create temporary cancellation tokens when needed - Token counting now works both during and outside of active requests - Added 4 new tests to verify the fix and prevent regression - Resolves issue where VS Code LM API users experienced context overflow errors --- src/api/providers/__tests__/vscode-lm.spec.ts | 60 +++++++++++++++++++ src/api/providers/vscode-lm.ts | 25 +++++--- 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts index 9c050b5bc6c..305305d2289 100644 --- a/src/api/providers/__tests__/vscode-lm.spec.ts +++ b/src/api/providers/__tests__/vscode-lm.spec.ts @@ -437,6 +437,66 @@ describe("VsCodeLmHandler", () => { }) }) + describe("countTokens", () => { + beforeEach(() => { + handler["client"] = mockLanguageModelChat + }) + + it("should count tokens when called outside of an active request", async () => { + // Ensure no active request cancellation token exists + handler["currentRequestCancellation"] = null + + mockLanguageModelChat.countTokens.mockResolvedValueOnce(42) + + const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "Hello world" }] + const result = await handler.countTokens(content) + + expect(result).toBe(42) + expect(mockLanguageModelChat.countTokens).toHaveBeenCalledWith("Hello world", expect.any(Object)) + }) + + it("should count tokens when called during an active request", async () => { + // Simulate an active request with a cancellation token + const mockCancellation = { + token: { isCancellationRequested: false, onCancellationRequested: vi.fn() }, + cancel: vi.fn(), + dispose: vi.fn(), + } + handler["currentRequestCancellation"] = mockCancellation as any + + mockLanguageModelChat.countTokens.mockResolvedValueOnce(50) + + const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "Test content" }] + const result = await handler.countTokens(content) + + expect(result).toBe(50) + expect(mockLanguageModelChat.countTokens).toHaveBeenCalledWith("Test content", mockCancellation.token) + }) + + it("should return 0 when no client is available", async () => { + handler["client"] = null + handler["currentRequestCancellation"] = null + + const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "Hello" }] + const result = await handler.countTokens(content) + + expect(result).toBe(0) + }) + + it("should handle image blocks with placeholder", async () => { + handler["currentRequestCancellation"] = null + mockLanguageModelChat.countTokens.mockResolvedValueOnce(5) + + const content: Anthropic.Messages.ContentBlockParam[] = [ + { type: "image", source: { type: "base64", media_type: "image/png", data: "abc" } }, + ] + const result = await handler.countTokens(content) + + expect(result).toBe(5) + expect(mockLanguageModelChat.countTokens).toHaveBeenCalledWith("[IMAGE]", expect.any(Object)) + }) + }) + describe("completePrompt", () => { it("should complete single prompt", async () => { const mockModel = { ...mockLanguageModelChat } diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index a77d326e590..8fb564a9d59 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -229,23 +229,29 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan return 0 } - if (!this.currentRequestCancellation) { - console.warn("Roo Code : No cancellation token available for token counting") - return 0 - } - // Validate input if (!text) { console.debug("Roo Code : Empty text provided for token counting") return 0 } + // Create a temporary cancellation token if we don't have one (e.g., when called outside a request) + let cancellationToken: vscode.CancellationToken + let tempCancellation: vscode.CancellationTokenSource | null = null + + if (this.currentRequestCancellation) { + cancellationToken = this.currentRequestCancellation.token + } else { + tempCancellation = new vscode.CancellationTokenSource() + cancellationToken = tempCancellation.token + } + try { // Handle different input types let tokenCount: number if (typeof text === "string") { - tokenCount = await this.client.countTokens(text, this.currentRequestCancellation.token) + tokenCount = await this.client.countTokens(text, cancellationToken) } else if (text instanceof vscode.LanguageModelChatMessage) { // For chat messages, ensure we have content if (!text.content || (Array.isArray(text.content) && text.content.length === 0)) { @@ -253,7 +259,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan return 0 } const countMessage = extractTextCountFromMessage(text) - tokenCount = await this.client.countTokens(countMessage, this.currentRequestCancellation.token) + tokenCount = await this.client.countTokens(countMessage, cancellationToken) } else { console.warn("Roo Code : Invalid input type for token counting") return 0 @@ -287,6 +293,11 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan } return 0 // Fallback to prevent stream interruption + } finally { + // Clean up temporary cancellation token + if (tempCancellation) { + tempCancellation.dispose() + } } } From f99b1bda242e631e4301def78be349a31a21ab7c Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 26 Jan 2026 23:39:09 -0500 Subject: [PATCH 2/2] fix: sanitize tool IDs in LiteLLM to prevent Bedrock validation errors - Add sanitizeOpenAiCallId normalization to convertToOpenAiMessages call - Prevents toolUseId exceeding 64-character limit when proxying to Bedrock - Add comprehensive test coverage for tool ID truncation and uniqueness - Matches pattern used by Bedrock and OpenRouter providers --- src/api/providers/__tests__/lite-llm.spec.ts | 202 +++++++++++++++++++ src/api/providers/lite-llm.ts | 5 +- 2 files changed, 206 insertions(+), 1 deletion(-) diff --git a/src/api/providers/__tests__/lite-llm.spec.ts b/src/api/providers/__tests__/lite-llm.spec.ts index 606c0f35b0b..9f3a641cb3b 100644 --- a/src/api/providers/__tests__/lite-llm.spec.ts +++ b/src/api/providers/__tests__/lite-llm.spec.ts @@ -718,4 +718,206 @@ describe("LiteLLMHandler", () => { }) }) }) + + describe("tool ID normalization", () => { + it("should truncate tool IDs longer than 64 characters", async () => { + const optionsWithBedrock: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "bedrock/anthropic.claude-3-sonnet", + } + handler = new LiteLLMHandler(optionsWithBedrock) + + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "bedrock/anthropic.claude-3-sonnet", + info: { ...litellmDefaultModelInfo, maxTokens: 8192 }, + }) + + // Create a tool ID longer than 64 characters + const longToolId = "toolu_" + "a".repeat(70) // 76 characters total + + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help you with that." }, + { type: "tool_use", id: longToolId, name: "read_file", input: { path: "test.txt" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: longToolId, content: "file contents" }], + }, + ] + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Response" } }], + usage: { prompt_tokens: 100, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const generator = handler.createMessage(systemPrompt, messages) + for await (const _chunk of generator) { + // Consume + } + + // Verify that tool IDs are truncated to 64 characters or less + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + const toolMessage = createCall.messages.find((msg: any) => msg.role === "tool") + + expect(assistantMessage).toBeDefined() + expect(assistantMessage.tool_calls[0].id.length).toBeLessThanOrEqual(64) + + expect(toolMessage).toBeDefined() + expect(toolMessage.tool_call_id.length).toBeLessThanOrEqual(64) + }) + + it("should not modify tool IDs that are already within 64 characters", async () => { + const optionsWithBedrock: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "bedrock/anthropic.claude-3-sonnet", + } + handler = new LiteLLMHandler(optionsWithBedrock) + + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "bedrock/anthropic.claude-3-sonnet", + info: { ...litellmDefaultModelInfo, maxTokens: 8192 }, + }) + + // Create a tool ID within 64 characters + const shortToolId = "toolu_01ABC123" // Well under 64 characters + + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help you with that." }, + { type: "tool_use", id: shortToolId, name: "read_file", input: { path: "test.txt" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: shortToolId, content: "file contents" }], + }, + ] + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Response" } }], + usage: { prompt_tokens: 100, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const generator = handler.createMessage(systemPrompt, messages) + for await (const _chunk of generator) { + // Consume + } + + // Verify that tool IDs are unchanged + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + const toolMessage = createCall.messages.find((msg: any) => msg.role === "tool") + + expect(assistantMessage).toBeDefined() + expect(assistantMessage.tool_calls[0].id).toBe(shortToolId) + + expect(toolMessage).toBeDefined() + expect(toolMessage.tool_call_id).toBe(shortToolId) + }) + + it("should maintain uniqueness with hash suffix when truncating", async () => { + const optionsWithBedrock: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "bedrock/anthropic.claude-3-sonnet", + } + handler = new LiteLLMHandler(optionsWithBedrock) + + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "bedrock/anthropic.claude-3-sonnet", + info: { ...litellmDefaultModelInfo, maxTokens: 8192 }, + }) + + // Create two tool IDs that differ only near the end + const longToolId1 = "toolu_" + "a".repeat(60) + "_suffix1" + const longToolId2 = "toolu_" + "a".repeat(60) + "_suffix2" + + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help." }, + { type: "tool_use", id: longToolId1, name: "read_file", input: { path: "test1.txt" } }, + { type: "tool_use", id: longToolId2, name: "read_file", input: { path: "test2.txt" } }, + ], + }, + { + role: "user", + content: [ + { type: "tool_result", tool_use_id: longToolId1, content: "file1 contents" }, + { type: "tool_result", tool_use_id: longToolId2, content: "file2 contents" }, + ], + }, + ] + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Response" } }], + usage: { prompt_tokens: 100, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const generator = handler.createMessage(systemPrompt, messages) + for await (const _chunk of generator) { + // Consume + } + + // Verify that truncated tool IDs are unique (hash suffix ensures this) + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + + expect(assistantMessage).toBeDefined() + expect(assistantMessage.tool_calls).toHaveLength(2) + + const id1 = assistantMessage.tool_calls[0].id + const id2 = assistantMessage.tool_calls[1].id + + // Both should be truncated to 64 characters + expect(id1.length).toBeLessThanOrEqual(64) + expect(id2.length).toBeLessThanOrEqual(64) + + // They should be different (hash suffix ensures uniqueness) + expect(id1).not.toBe(id2) + }) + }) }) diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index e95c0a8908c..cf8d16a1129 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -9,6 +9,7 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { sanitizeOpenAiCallId } from "../../utils/tool-id" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" @@ -115,7 +116,9 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa ): ApiStream { const { id: modelId, info } = await this.fetchModel() - const openAiMessages = convertToOpenAiMessages(messages) + const openAiMessages = convertToOpenAiMessages(messages, { + normalizeToolCallId: sanitizeOpenAiCallId, + }) // Prepare messages with cache control if enabled and supported let systemMessage: OpenAI.Chat.ChatCompletionMessageParam