{"$schema":"https://tokenwisehq.com/openapi.json","lastVerified":"2026-05-24","generatedAt":"2026-06-01T20:29:24.547Z","providers":{"openai":{"name":"OpenAI","pricingUrl":"https://openai.com/api/pricing"},"anthropic":{"name":"Anthropic","pricingUrl":"https://www.anthropic.com/pricing"},"google":{"name":"Google","pricingUrl":"https://ai.google.dev/pricing"},"xai":{"name":"xAI","pricingUrl":"https://x.ai/api"},"deepseek":{"name":"DeepSeek","pricingUrl":"https://api-docs.deepseek.com/quick_start/pricing"},"mistral":{"name":"Mistral","pricingUrl":"https://mistral.ai/pricing"},"meta":{"name":"Meta","pricingUrl":"https://llama.meta.com"},"groq":{"name":"Groq","pricingUrl":"https://groq.com/pricing"},"cohere":{"name":"Cohere","pricingUrl":"https://cohere.com/pricing"},"voyage":{"name":"Voyage AI","pricingUrl":"https://docs.voyageai.com/docs/pricing"}},"models":[{"id":"gpt-4o","name":"GPT-4o","provider":"openai","inputPricePerMillion":2.5,"outputPricePerMillion":10,"cachedInputPricePerMillion":1.25,"contextTokens":128000,"modalities":["text","vision"],"outputTps":110,"ttftMs":600,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"gpt-4o-mini","name":"GPT-4o mini","provider":"openai","inputPricePerMillion":0.15,"outputPricePerMillion":0.6,"cachedInputPricePerMillion":0.075,"contextTokens":128000,"modalities":["text","vision"],"outputTps":140,"ttftMs":400,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"gpt-4.1","name":"GPT-4.1","provider":"openai","inputPricePerMillion":2,"outputPricePerMillion":8,"cachedInputPricePerMillion":0.5,"contextTokens":1000000,"modalities":["text","vision"],"outputTps":100,"ttftMs":600,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"gpt-4.1-mini","name":"GPT-4.1 mini","provider":"openai","inputPricePerMillion":0.4,"outputPricePerMillion":1.6,"cachedInputPricePerMillion":0.1,"contextTokens":1000000,"modalities":["text","vision"],"outputTps":150,"ttftMs":400,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"o1","name":"o1","provider":"openai","inputPricePerMillion":15,"outputPricePerMillion":60,"cachedInputPricePerMillion":7.5,"contextTokens":200000,"modalities":["text"],"outputTps":30,"ttftMs":8000,"features":{"jsonMode":"no","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"no","reasoningTokens":"yes","streamingJson":"partial"},"lastVerified":"2026-05-24","notes":"Reasoning tokens billed but hidden from output."},{"id":"o3-mini","name":"o3-mini","provider":"openai","inputPricePerMillion":1.1,"outputPricePerMillion":4.4,"cachedInputPricePerMillion":0.55,"contextTokens":200000,"modalities":["text"],"outputTps":150,"ttftMs":1500,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"yes","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"claude-opus-4-7","name":"Claude Opus 4.7","provider":"anthropic","inputPricePerMillion":15,"outputPricePerMillion":75,"cachedInputPricePerMillion":1.5,"contextTokens":1000000,"modalities":["text","vision"],"outputTps":50,"ttftMs":1200,"features":{"jsonMode":"partial","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"yes","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"Cache write 1.25× input price. Extended thinking optional. JSON via prefill or tool-use trick."},{"id":"claude-sonnet-4-6","name":"Claude Sonnet 4.6","provider":"anthropic","inputPricePerMillion":3,"outputPricePerMillion":15,"cachedInputPricePerMillion":0.3,"contextTokens":200000,"modalities":["text","vision"],"outputTps":90,"ttftMs":800,"features":{"jsonMode":"partial","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"yes","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"claude-haiku-4-5","name":"Claude Haiku 4.5","provider":"anthropic","inputPricePerMillion":0.8,"outputPricePerMillion":4,"cachedInputPricePerMillion":0.08,"contextTokens":200000,"modalities":["text","vision"],"outputTps":180,"ttftMs":400,"features":{"jsonMode":"partial","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"claude-3-5-sonnet","name":"Claude 3.5 Sonnet","provider":"anthropic","inputPricePerMillion":3,"outputPricePerMillion":15,"cachedInputPricePerMillion":0.3,"contextTokens":200000,"modalities":["text","vision"],"outputTps":80,"ttftMs":900,"features":{"jsonMode":"partial","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"Older but still popular for cost-stability reasons."},{"id":"gemini-2-0-flash","name":"Gemini 2.0 Flash","provider":"google","inputPricePerMillion":0.1,"outputPricePerMillion":0.4,"cachedInputPricePerMillion":0.025,"contextTokens":1000000,"modalities":["text","vision","audio"],"outputTps":250,"ttftMs":300,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"gemini-1-5-pro","name":"Gemini 1.5 Pro","provider":"google","inputPricePerMillion":1.25,"outputPricePerMillion":5,"cachedInputPricePerMillion":0.3125,"contextTokens":2000000,"modalities":["text","vision","audio"],"outputTps":60,"ttftMs":800,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"2M context — the largest of any production model."},{"id":"gemini-1-5-flash","name":"Gemini 1.5 Flash","provider":"google","inputPricePerMillion":0.075,"outputPricePerMillion":0.3,"cachedInputPricePerMillion":0.01875,"contextTokens":1000000,"modalities":["text","vision","audio"],"outputTps":190,"ttftMs":350,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"grok-3","name":"Grok-3","provider":"xai","inputPricePerMillion":3,"outputPricePerMillion":15,"cachedInputPricePerMillion":null,"contextTokens":1000000,"modalities":["text","vision"],"outputTps":90,"ttftMs":600,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"grok-2","name":"Grok-2","provider":"xai","inputPricePerMillion":2,"outputPricePerMillion":10,"cachedInputPricePerMillion":null,"contextTokens":128000,"modalities":["text","vision"],"outputTps":70,"ttftMs":700,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"deepseek-v3","name":"DeepSeek V3","provider":"deepseek","inputPricePerMillion":0.27,"outputPricePerMillion":1.1,"cachedInputPricePerMillion":0.07,"contextTokens":128000,"modalities":["text"],"outputTps":60,"ttftMs":700,"features":{"jsonMode":"yes","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"Off-peak (UTC 16:30–00:30) is 50% cheaper."},{"id":"deepseek-r1","name":"DeepSeek R1","provider":"deepseek","inputPricePerMillion":0.55,"outputPricePerMillion":2.19,"cachedInputPricePerMillion":0.14,"contextTokens":128000,"modalities":["text"],"outputTps":30,"ttftMs":2500,"features":{"jsonMode":"yes","strictSchema":"no","functionCalling":"yes","parallelToolCalls":"no","reasoningTokens":"yes","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"Reasoning model — outputs include chain-of-thought."},{"id":"mistral-large","name":"Mistral Large 2","provider":"mistral","inputPricePerMillion":2,"outputPricePerMillion":6,"cachedInputPricePerMillion":null,"contextTokens":128000,"modalities":["text","vision"],"outputTps":85,"ttftMs":600,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"mistral-small","name":"Mistral Small 3","provider":"mistral","inputPricePerMillion":0.2,"outputPricePerMillion":0.6,"cachedInputPricePerMillion":null,"contextTokens":32000,"modalities":["text"],"outputTps":150,"ttftMs":350,"features":{"jsonMode":"yes","strictSchema":"yes","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":null},{"id":"llama-3-3-70b-groq","name":"Llama 3.3 70B (Groq)","provider":"groq","inputPricePerMillion":0.59,"outputPricePerMillion":0.79,"cachedInputPricePerMillion":null,"contextTokens":128000,"modalities":["text"],"outputTps":280,"ttftMs":250,"features":{"jsonMode":"yes","strictSchema":"partial","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"Open-weight model. Pricing + speed depend on host."},{"id":"llama-3-1-8b-groq","name":"Llama 3.1 8B (Groq)","provider":"groq","inputPricePerMillion":0.05,"outputPricePerMillion":0.08,"cachedInputPricePerMillion":null,"contextTokens":128000,"modalities":["text"],"outputTps":750,"ttftMs":200,"features":{"jsonMode":"yes","strictSchema":"partial","functionCalling":"yes","parallelToolCalls":"yes","reasoningTokens":"no","streamingJson":"yes"},"lastVerified":"2026-05-24","notes":"Lightning-fast on Groq's LPU hardware."}],"embeddings":[{"id":"openai-text-embedding-3-small","name":"text-embedding-3-small","provider":"openai","dimensions":1536,"pricePerMillion":0.02,"maxInputTokens":8191,"lastVerified":"2026-05-24"},{"id":"openai-text-embedding-3-large","name":"text-embedding-3-large","provider":"openai","dimensions":3072,"pricePerMillion":0.13,"maxInputTokens":8191,"lastVerified":"2026-05-24"},{"id":"cohere-embed-v3-english","name":"Embed v3 English","provider":"cohere","dimensions":1024,"pricePerMillion":0.1,"maxInputTokens":512,"lastVerified":"2026-05-24"},{"id":"cohere-embed-v3-multilingual","name":"Embed v3 Multilingual","provider":"cohere","dimensions":1024,"pricePerMillion":0.1,"maxInputTokens":512,"lastVerified":"2026-05-24","notes":"100+ languages."},{"id":"voyage-3","name":"voyage-3","provider":"voyage","dimensions":1024,"pricePerMillion":0.06,"maxInputTokens":32000,"lastVerified":"2026-05-24"},{"id":"voyage-3-lite","name":"voyage-3-lite","provider":"voyage","dimensions":512,"pricePerMillion":0.02,"maxInputTokens":32000,"lastVerified":"2026-05-24"},{"id":"mistral-embed","name":"mistral-embed","provider":"mistral","dimensions":1024,"pricePerMillion":0.1,"maxInputTokens":8192,"lastVerified":"2026-05-24"}],"rateLimits":[{"provider":"openai","flagshipModel":"GPT-4o","otherModelsNote":"o1 / o3 models have lower per-model RPM caps (e.g., o1 starts at 500 RPM on Tier 1). Mini variants get higher TPM.","tiers":[{"name":"Tier 1","qualification":"$5 spent + 7 days","rpm":500,"tpm":30000,"dailyLimit":"200 requests"},{"name":"Tier 2","qualification":"$50 spent + 7 days","rpm":5000,"tpm":450000,"dailyLimit":null},{"name":"Tier 3","qualification":"$100 spent + 7 days","rpm":5000,"tpm":800000,"dailyLimit":null},{"name":"Tier 4","qualification":"$250 spent + 14 days","rpm":10000,"tpm":2000000,"dailyLimit":null},{"name":"Tier 5","qualification":"$1,000 spent + 30 days","rpm":30000,"tpm":30000000,"dailyLimit":null}]},{"provider":"anthropic","flagshipModel":"Claude Sonnet 4.6","otherModelsNote":"Opus has half the TPM at the same tier. Haiku has 2× TPM.","tiers":[{"name":"Build Tier 1","qualification":"Credit card added","rpm":50,"tpm":40000,"dailyLimit":null},{"name":"Build Tier 2","qualification":"$40 spent + 7 days","rpm":1000,"tpm":80000,"dailyLimit":null},{"name":"Build Tier 3","qualification":"$200 spent + 7 days","rpm":2000,"tpm":160000,"dailyLimit":null},{"name":"Build Tier 4","qualification":"$400 spent + 14 days","rpm":4000,"tpm":400000,"dailyLimit":null},{"name":"Scale","qualification":"Sales contract","rpm":null,"tpm":null,"dailyLimit":null,"notes":"Custom limits, negotiated."}]},{"provider":"google","flagshipModel":"Gemini 2.0 Flash","otherModelsNote":"Gemini 1.5 Pro has lower RPM (300 on Tier 1).","tiers":[{"name":"Free","qualification":"Google account","rpm":15,"tpm":1000000,"dailyLimit":"1,500 requests","notes":"Used for training by default."},{"name":"Tier 1","qualification":"Billing enabled","rpm":2000,"tpm":4000000,"dailyLimit":null},{"name":"Tier 2","qualification":"$250 spent + 30 days","rpm":10000,"tpm":10000000,"dailyLimit":null}]},{"provider":"groq","flagshipModel":"Llama 3.3 70B","tiers":[{"name":"Free","qualification":"Sign up","rpm":30,"tpm":6000,"dailyLimit":"14,400 requests"},{"name":"Pay-as-you-go","qualification":"Billing enabled","rpm":1000,"tpm":300000,"dailyLimit":null}]},{"provider":"deepseek","flagshipModel":"DeepSeek V3 (deepseek-chat)","otherModelsNote":"No published per-tier limits; concurrency-based.","tiers":[{"name":"Pay-as-you-go","qualification":"Billing enabled","rpm":null,"tpm":null,"dailyLimit":null,"notes":"DeepSeek doesn't publish RPM/TPM — concurrency-controlled. Expect ~60 concurrent requests."}]},{"provider":"mistral","flagshipModel":"Mistral Large 2","tiers":[{"name":"Free (Experiment)","qualification":"Sign up","rpm":1,"tpm":500000,"dailyLimit":"1B tokens / month"},{"name":"Production","qualification":"Billing enabled","rpm":200,"tpm":2000000,"dailyLimit":null}]},{"provider":"xai","flagshipModel":"Grok-3","tiers":[{"name":"Pay-as-you-go","qualification":"Billing enabled","rpm":60,"tpm":240000,"dailyLimit":null,"notes":"Higher tiers negotiated."}]}],"meta":{"license":"CC BY 4.0 — link back to tokenwisehq.com if you use this.","source":"tokenwisehq.com/api/llm-prices.json","contact":"hi@tokenwisehq.com","cadence":"Refreshed weekly. Spot a wrong number? Email us."}}