{"slug":"inference-as-a-service","title":"Inference-as-a-Service","tagline":"Token-economy platforms that serve models by the token — inference is now ~2/3 of accelerator demand.","hub_url":"/topic/inference-as-a-service","counts":{"companies":5,"business_signals_90d":6,"talent_signals":0,"jobseeker_profiles":0,"fit_scores":0},"companies":[{"slug":"baseten","name":"Baseten","primary_layer":"L5","description":null,"profile_url":"/companies/baseten"},{"slug":"fireworks-ai","name":"Fireworks AI","primary_layer":"L5","description":"Inference platform optimized for open-weight LLMs and multi-modal.","profile_url":"/companies/fireworks-ai"},{"slug":"together-ai","name":"Together AI","primary_layer":"L5","description":"Inference-as-a-service for open-weight models (Llama, DeepSeek, Qwen).","profile_url":"/companies/together-ai"},{"slug":"groq","name":"Groq","primary_layer":"L4","description":"LPU inference-only accelerator; deterministic-latency architecture. GroqCloud commercial inference service. Sample point for inference-specialised non-NVIDIA silicon path.","profile_url":"/companies/groq"},{"slug":"cerebras","name":"Cerebras","primary_layer":"L4","description":null,"profile_url":"/companies/cerebras"}],"generated_at":"2026-06-25T06:31:14.964Z","personas":[]}