{"slug":"ai-data-stack","title":"AI Data Stack","tagline":"Labeling, synthetic data, vector DBs, and the RAG plumbing that feeds every model.","hub_url":"/topic/ai-data-stack","counts":{"companies":12,"business_signals_90d":0,"talent_signals":0},"companies":[{"slug":"unstructured-io","name":"Unstructured","primary_layer":"L6","description":"PDF/HTML/PPTX → LLM-ready chunks; ETL for unstructured data.","profile_url":"/companies/unstructured-io"},{"slug":"scale-ai","name":"Scale AI","primary_layer":"L6","description":"Largest RLHF/data-labeling vendor. Meta $14.3B investment Sept 2025.","profile_url":"/companies/scale-ai"},{"slug":"surge-ai","name":"Surge AI","primary_layer":"L6","description":"Premium-labeling competitor to Scale; PhD-level RLHF for Anthropic/OpenAI.","profile_url":"/companies/surge-ai"},{"slug":"labelbox","name":"Labelbox","primary_layer":"L6","description":"Self-serve labeling platform + Alignerr expert network for LLM evals.","profile_url":"/companies/labelbox"},{"slug":"pinecone","name":"Pinecone","primary_layer":"L6","description":"Managed vector DB category leader. Notion AI / Shopify / Gong customers.","profile_url":"/companies/pinecone"},{"slug":"weaviate","name":"Weaviate","primary_layer":"L6","description":"Open-source vector DB w/ managed cloud; hybrid search + multi-modal.","profile_url":"/companies/weaviate"},{"slug":"chroma","name":"Chroma","primary_layer":"L6","description":"Developer-first OSS vector DB; default in LangChain/LlamaIndex tutorials.","profile_url":"/companies/chroma"},{"slug":"qdrant","name":"Qdrant","primary_layer":"L6","description":"Rust-based OSS vector DB; X (Twitter), Bayer, Disney customers.","profile_url":"/companies/qdrant"},{"slug":"mostly-ai","name":"MOSTLY AI","primary_layer":"L6","description":"Synthetic tabular data for finance/healthcare where real data is PII-locked.","profile_url":"/companies/mostly-ai"},{"slug":"gretel-ai","name":"Gretel","primary_layer":"L6","description":"Synthetic-data API for LLM fine-tuning. Acquired by NVIDIA March 2025 (~$320M).","profile_url":"/companies/gretel-ai"},{"slug":"parallel-domain","name":"Parallel Domain","primary_layer":"L6","description":"Synthetic-data for AV/embodied AI. Toyota, Woven, Waabi customers.","profile_url":"/companies/parallel-domain"},{"slug":"llamaindex","name":"LlamaIndex","primary_layer":"L6","description":"RAG/data-framework leader; document parsing + indexing for enterprise LLM apps.","profile_url":"/companies/llamaindex"}],"generated_at":"2026-06-09T23:39:46.376Z"}