Derive · Derivation procedure

Token measurement

How were categories isolated from the captured request?

What this proves

The implementation removes one category at a time from complete requests and subtracts adjacent counts. Per-item calls use one baseline and are non-additive.

Input
Captured request and API counts
Output
Category totals and per-item estimates
File
scripts/count-tokens.mjs

Inspect the derivation procedure

View raw
Open the complete source file
JavaScript
#!/usr/bin/env node
import { readdir, unlink } from "node:fs/promises";
import { resolve } from "node:path";
import {
  parseArgs,
  provenanceDir,
  rawCountsDir,
  readJson,
  redactHeaders,
  resultsDir,
  sha256,
  stableJson,
  writeJson,
} from "./lib.mjs";
import { renderResult } from "./render-results.mjs";

const args = parseArgs(process.argv.slice(2));
const endpoint = args.endpoint || process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com";
const apiKey = process.env.ANTHROPIC_API_KEY;
const authToken = process.env.ANTHROPIC_AUTH_TOKEN;
const countTokensUrl = `${endpoint.replace(/\/$/, "")}/v1/messages/count_tokens?beta=true`;
const capturePath = resolve(provenanceDir, "sdk-query-request.redacted.json");
const capturedHeadersPath = resolve(provenanceDir, "sdk-query-request-headers.redacted.json");
const usedMeasurements = new Map();
const categoryNames = [
  "system_parameter_blocks",
  "sdk_injected_message_context",
  "skill_descriptions",
  "skill_listing_boilerplate",
  "built_in_tool_definitions",
];

function splitSkillListing(text) {
  const matches = [...text.matchAll(/^- ([^:\n]+): /gm)];
  if (!matches.length) throw new Error("Could not parse the captured skill listing");
  const parts = [{ category: "boilerplate", text: text.slice(0, matches[0].index) }];
  const skills = [];
  for (let index = 0; index < matches.length; index += 1) {
    const match = matches[index];
    const nextStart = index + 1 < matches.length ? matches[index + 1].index : text.length;
    const descriptionStart = match.index + match[0].length;
    let descriptionEnd = nextStart;
    if (text[descriptionEnd - 1] === "\n") descriptionEnd -= 1;
    const description = text.slice(descriptionStart, descriptionEnd);
    const separator = text.slice(descriptionEnd, nextStart);
    parts.push({ category: "boilerplate", text: match[0] });
    parts.push({ category: "description", skill: match[1], text: description });
    if (separator) parts.push({ category: "boilerplate", text: separator });
    skills.push({ name: match[1], description });
  }
  if (parts.map((part) => part.text).join("") !== text)
    throw new Error("Skill listing split did not round-trip");
  return { parts, skills };
}

function projectCountableRequest(capture) {
  const projected = { model: args.model || capture.model };
  for (const key of ["system", "messages", "thinking", "tools", "output_config"]) {
    if (capture[key] !== undefined) projected[key] = capture[key];
  }
  return projected;
}

function createBuilder(capture) {
  if (!Array.isArray(capture.messages) || capture.messages.length !== 2) {
    throw new Error("Expected the captured request to contain exactly two messages");
  }
  const userMessage = capture.messages[0];
  const skillMessage = capture.messages[1];
  if (
    userMessage.role !== "user" ||
    !Array.isArray(userMessage.content) ||
    userMessage.content.length !== 2
  ) {
    throw new Error("Expected messages[0] to contain the SDK reminder and user prompt blocks");
  }
  if (skillMessage.role !== "system" || typeof skillMessage.content !== "string") {
    throw new Error("Expected messages[1] to be the captured system-role skill listing");
  }
  const split = splitSkillListing(skillMessage.content);

  function build(selectedNames, options = {}) {
    const selected = new Set(selectedNames);
    const content = [];
    if (selected.has("sdk_injected_message_context")) content.push(userMessage.content[0]);
    content.push(userMessage.content[1]);
    if (options.appendText) content.push({ type: "text", text: options.appendText });
    const messages = [{ role: "user", content }];

    const includeDescriptions = selected.has("skill_descriptions");
    const includeBoilerplate = selected.has("skill_listing_boilerplate");
    if (includeDescriptions || includeBoilerplate) {
      const skillText = split.parts
        .filter((part) => {
          if (part.category === "description") {
            return includeDescriptions && (!options.onlySkill || part.skill === options.onlySkill);
          }
          return includeBoilerplate;
        })
        .map((part) => part.text)
        .join("");
      messages.push({ role: "system", content: skillText });
    }

    const body = {
      model: args.model || capture.model,
      messages,
    };
    if (capture.thinking !== undefined) body.thinking = capture.thinking;
    if (capture.output_config !== undefined) body.output_config = capture.output_config;
    if (selected.has("system_parameter_blocks")) body.system = capture.system;
    if (selected.has("built_in_tool_definitions")) body.tools = capture.tools;
    return body;
  }

  return { build, skills: split.skills };
}

function removeUserPrompt(body) {
  const result = structuredClone(body);
  const content = result.messages?.[0]?.content;
  if (!Array.isArray(content) || content.length !== 2) {
    throw new Error("Expected the full request to contain SDK context and one user-prompt block");
  }
  result.messages[0].content = [content[0]];
  return result;
}

function requestHeaders(capturedHeaders) {
  const betas = new Set(
    (capturedHeaders["anthropic-beta"] || "")
      .split(",")
      .map((value) => value.trim())
      .filter(Boolean),
  );
  betas.add("token-counting-2024-11-01");
  const headers = {
    "content-type": "application/json",
    "anthropic-version": capturedHeaders["anthropic-version"] || "2023-06-01",
    "anthropic-beta": [...betas].join(","),
  };
  if (apiKey) headers["x-api-key"] = apiKey;
  else if (authToken) headers.authorization = `Bearer ${authToken}`;
  return headers;
}

async function sleep(milliseconds) {
  await new Promise((resolvePromise) => setTimeout(resolvePromise, milliseconds));
}

async function countTokens(label, body, capturedHeaders) {
  const canonical = stableJson({ endpoint, body, beta: capturedHeaders["anthropic-beta"] || null });
  const id = sha256(canonical);
  const existingLabel = usedMeasurements.get(id);
  if (existingLabel && existingLabel !== label) {
    throw new Error(
      `Measurements ${existingLabel} and ${label} produce the same canonical request`,
    );
  }
  usedMeasurements.set(id, label);
  const requestPath = resolve(rawCountsDir, `${id}.request.json`);
  const responsePath = resolve(rawCountsDir, `${id}.response.json`);
  try {
    const cached = await readJson(responsePath);
    if (cached.ok && Number.isInteger(cached.body?.input_tokens)) {
      const cachedRequest = await readJson(requestPath);
      const safeCached = { ...cached, label, headers: redactHeaders(cached.headers || {}) };
      const safeCachedRequest = {
        ...cachedRequest,
        label,
        headers: redactHeaders(cachedRequest.headers || {}),
      };
      if (
        stableJson(safeCached) !== stableJson(cached) ||
        stableJson(safeCachedRequest) !== stableJson(cachedRequest)
      ) {
        await Promise.all([
          writeJson(requestPath, safeCachedRequest),
          writeJson(responsePath, safeCached),
        ]);
      }
      return cached.body.input_tokens;
    }
  } catch {
    // Cache miss.
  }

  const headers = requestHeaders(capturedHeaders);
  await writeJson(requestPath, {
    id,
    label,
    generatedAt: new Date().toISOString(),
    url: countTokensUrl,
    headers: redactHeaders(headers),
    body,
    bodyCanonicalSha256: sha256(stableJson(body)),
  });

  let lastError;
  for (let attempt = 1; attempt <= 6; attempt += 1) {
    try {
      const response = await fetch(countTokensUrl, {
        method: "POST",
        headers,
        body: JSON.stringify(body),
        signal: AbortSignal.timeout(30_000),
      });
      const responseText = await response.text();
      let responseBody;
      try {
        responseBody = JSON.parse(responseText);
      } catch {
        responseBody = { raw: responseText };
      }
      const record = {
        id,
        label,
        receivedAt: new Date().toISOString(),
        ok: response.ok,
        status: response.status,
        headers: redactHeaders(Object.fromEntries(response.headers.entries())),
        body: responseBody,
      };
      await writeJson(responsePath, record);
      if (response.ok && Number.isInteger(responseBody.input_tokens))
        return responseBody.input_tokens;
      if (![429, 500, 502, 503, 504].includes(response.status)) {
        throw new Error(`count_tokens rejected ${label}: HTTP ${response.status} ${responseText}`);
      }
      lastError = new Error(`HTTP ${response.status} ${responseText}`);
    } catch (error) {
      lastError = error;
      if (attempt === 6 || /rejected/.test(error.message)) throw error;
    }
    await sleep(500 * 2 ** (attempt - 1));
  }
  throw lastError;
}

async function pruneRawMeasurements() {
  const keep = new Set(
    [...usedMeasurements.keys()].flatMap((id) => [`${id}.request.json`, `${id}.response.json`]),
  );
  const files = await readdir(rawCountsDir);
  await Promise.all(
    files.filter((file) => !keep.has(file)).map((file) => unlink(resolve(rawCountsDir, file))),
  );
}

async function main() {
  const capture = projectCountableRequest(await readJson(capturePath));
  const capturedHeaders = await readJson(capturedHeadersPath);
  const builder = createBuilder(capture);
  const fullBody = builder.build(categoryNames);
  if (stableJson(fullBody) !== stableJson(capture)) {
    throw new Error(
      "The reconstructed full count_tokens body does not match the captured countable request projection",
    );
  }
  const plan = {
    generatedAt: new Date().toISOString(),
    endpoint: countTokensUrl,
    model: fullBody.model,
    capturedAnthropicBeta: capturedHeaders["anthropic-beta"] || null,
    countTokensBeta: "token-counting-2024-11-01",
    countableFields: Object.keys(fullBody),
    deliberatelyExcludedNonContextFields: [
      "context_management",
      "max_tokens",
      "metadata",
      "stream",
    ],
    overviewCategories: ["built_in_tool_definitions", "skill_descriptions", "everything_else"],
    categoryCalls: 3,
    skillIndependentEstimateCalls: builder.skills.length,
    toolIndependentEstimateCalls: capture.tools.length,
    promptAblationCalls: 1,
    totalPlannedCalls: 4 + builder.skills.length + capture.tools.length,
    fullBodyCanonicalSha256: sha256(stableJson(fullBody)),
    fullBodyRoundTripsCapturedProjection: true,
  };
  await writeJson(resolve(provenanceDir, "count-tokens-plan.json"), plan);
  if (args["plan-only"]) {
    process.stdout.write(
      `Prepared a ${plan.totalPlannedCalls}-call measurement plan for ${plan.model}.\n`,
    );
    return;
  }
  if (!apiKey && !authToken) {
    throw new Error(
      "Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN before running official count_tokens measurements. Use --plan-only for credential-free validation.",
    );
  }
  const baselineNames = [
    "system_parameter_blocks",
    "sdk_injected_message_context",
    "skill_listing_boilerplate",
  ];
  const noToolNames = [...baselineNames, "skill_descriptions"];
  const totalTokens = await countTokens("category-full-request", fullBody, capturedHeaders);
  const withoutUserPromptTokens = await countTokens(
    "diagnostic-without-user-prompt",
    removeUserPrompt(fullBody),
    capturedHeaders,
  );
  const withoutToolsTokens = await countTokens(
    "category-without-tools",
    builder.build(noToolNames),
    capturedHeaders,
  );
  const baselineTokens = await countTokens(
    "category-without-tools-or-skill-descriptions",
    builder.build(baselineNames),
    capturedHeaders,
  );
  const rows = [
    {
      category: "built_in_tool_definitions",
      tokens: totalTokens - withoutToolsTokens,
      percent: ((totalTokens - withoutToolsTokens) / totalTokens) * 100,
      method: "Marginal count from including tools: full request minus request without tools",
    },
    {
      category: "skill_descriptions",
      tokens: withoutToolsTokens - baselineTokens,
      percent: ((withoutToolsTokens - baselineTokens) / totalTokens) * 100,
      method: "No-tools request minus request without skill descriptions",
    },
    {
      category: "everything_else",
      tokens: baselineTokens,
      percent: (baselineTokens / totalTokens) * 100,
      method: "Valid request without tools or skill-description text",
    },
  ];

  const skillEstimates = [];
  for (const skill of builder.skills) {
    const withSkill = await countTokens(
      `estimate-skill-description-${skill.name}`,
      builder.build(baselineNames, { appendText: skill.description }),
      capturedHeaders,
    );
    skillEstimates.push({
      skill: skill.name,
      description: skill.description,
      estimatedTokens: withSkill - baselineTokens,
      note: "Description serialized independently as text against the common baseline; official and non-additive estimate.",
    });
  }

  const toolEstimates = [];
  for (const tool of capture.tools) {
    const withTool = await countTokens(
      `estimate-tool-${tool.name}`,
      builder.build(baselineNames, { appendText: stableJson(tool) }),
      capturedHeaders,
    );
    toolEstimates.push({
      tool: tool.name,
      estimatedTokens: withTool - baselineTokens,
      note: "Captured tool object serialized independently as JSON text against the common baseline; official and non-additive estimate.",
    });
  }

  const captureProvenance = await readJson(resolve(provenanceDir, "capture-provenance.json"));
  const result = {
    generatedAt: new Date().toISOString(),
    endpoint: countTokensUrl,
    model: fullBody.model,
    method: {
      total: "Official Anthropic Messages count_tokens endpoint",
      attribution: "Sequential controlled ablation over 3 overview categories",
      drilldowns:
        "Independent item estimates against a common baseline; intentionally non-additive",
    },
    provenance: {
      capture: captureProvenance,
      countableFullBodyCanonicalSha256: sha256(stableJson(fullBody)),
      capturedAnthropicBeta: capturedHeaders["anthropic-beta"] || null,
      rawRequestResponseDirectory: "raw/count-tokens",
    },
    mece: {
      totalTokens,
      attributedTokenSum: rows.reduce((sum, row) => sum + row.tokens, 0),
      rows,
      overviewCounts: {
        full: totalTokens,
        withoutTools: withoutToolsTokens,
        withoutToolsOrSkillDescriptions: baselineTokens,
      },
    },
    promptAblation: {
      withoutUserPromptTokens,
      capturedPromptMarginalTokens: totalTokens - withoutUserPromptTokens,
      method:
        "Captured request minus the user-prompt content block; all SDK-supplied context held constant",
    },
    skills: skillEstimates,
    tools: toolEstimates,
  };
  await writeJson(resolve(resultsDir, "token-impact.json"), result);
  await renderResult(result);
  await pruneRawMeasurements();
  process.stdout.write(`Measured ${totalTokens} input tokens for ${fullBody.model}.\n`);
}

main().catch((error) => {
  console.error(error.stack || error.message);
  process.exitCode = 1;
});