pgvector Semantic Search
Overview
Solatis uses pgvector, a PostgreSQL extension, to power semantic search with vector embeddings. This enables AI-powered understanding of document meaning, not just keyword matching.
How It Works
Vector Embeddings
Concept: Text is converted into high-dimensional numerical vectors (arrays of numbers) that represent semantic meaning.
Example:
Text: "The cat sat on the mat"
Embedding: [0.023, -0.145, 0.892, ..., 0.234] # 1536 dimensionsSimilar Meanings = Close Vectors:
"dog sitting on rug" → [0.025, -0.142, 0.895, ..., 0.231]
"feline on carpet" → [0.024, -0.144, 0.893, ..., 0.233]
"airplane in sky" → [-0.523, 0.842, -0.123, ..., -0.845]Implementation
Database Schema
sql
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Document embeddings table
CREATE TABLE document_embeddings (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
document_id uuid REFERENCES documents(id) ON DELETE CASCADE,
chunk_index integer NOT NULL,
content text NOT NULL,
embedding vector(1536) NOT NULL, -- OpenAI ada-002 dimensions
metadata jsonb,
created_at timestamp DEFAULT now(),
-- Ensure unique chunks per document
UNIQUE(document_id, chunk_index)
);
-- Create HNSW index for fast similarity search
CREATE INDEX document_embeddings_embedding_idx
ON document_embeddings
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- Additional indexes for filtering
CREATE INDEX document_embeddings_document_id_idx
ON document_embeddings(document_id);
CREATE INDEX document_embeddings_metadata_idx
ON document_embeddings USING gin(metadata jsonb_path_ops);Document Processing
Chunking Strategy:
typescript
// Split document into overlapping chunks
function chunkDocument(text: string, options = {
chunkSize: 512, // tokens per chunk
overlap: 50 // token overlap between chunks
}): string[] {
const tokens = tokenize(text);
const chunks: string[] = [];
for (let i = 0; i < tokens.length; i += options.chunkSize - options.overlap) {
const chunk = tokens.slice(i, i + options.chunkSize);
chunks.push(detokenize(chunk));
}
return chunks;
}
// Process document
async function processDocument(documentId: string, content: string) {
const chunks = chunkDocument(content);
for (let i = 0; i < chunks.length; i++) {
// Generate embedding using OpenAI
const embedding = await openai.embeddings.create({
model: "text-embedding-3-large",
input: chunks[i]
});
// Store in database
await supabase
.from('document_embeddings')
.insert({
document_id: documentId,
chunk_index: i,
content: chunks[i],
embedding: embedding.data[0].embedding,
metadata: {
tokens: chunks[i].split(' ').length,
position: i
}
});
}
}Semantic Search Query
Basic Search Function:
sql
CREATE OR REPLACE FUNCTION search_documents(
query_embedding vector(1536),
match_threshold float DEFAULT 0.7,
match_count int DEFAULT 20,
filter_workspace_id uuid DEFAULT NULL
)
RETURNS TABLE (
id uuid,
document_id uuid,
content text,
similarity float,
metadata jsonb
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
SELECT
de.id,
de.document_id,
de.content,
1 - (de.embedding <=> query_embedding) as similarity,
de.metadata
FROM document_embeddings de
JOIN documents d ON d.id = de.document_id
WHERE
-- Apply workspace filter if provided
(filter_workspace_id IS NULL OR d.workspace_id = filter_workspace_id)
-- Similarity threshold
AND (1 - (de.embedding <=> query_embedding)) > match_threshold
ORDER BY de.embedding <=> query_embedding -- Cosine distance
LIMIT match_count;
END;
$$;Usage:
typescript
// Generate query embedding
const queryEmbedding = await openai.embeddings.create({
model: "text-embedding-3-large",
input: "How do I configure API authentication?"
});
// Search
const { data } = await supabase.rpc('search_documents', {
query_embedding: queryEmbedding.data[0].embedding,
match_threshold: 0.7,
match_count: 10,
filter_workspace_id: workspaceId
});
// Results are ranked by semantic similarity
console.log(data);
// [
// { similarity: 0.92, content: "API authentication configuration...", ... },
// { similarity: 0.87, content: "To set up auth, first generate API key...", ... },
// ...
// ]Advanced Features
Hybrid Search
Combine Semantic + Keyword:
sql
CREATE OR REPLACE FUNCTION hybrid_search(
query_text text,
query_embedding vector(1536),
semantic_weight float DEFAULT 0.7,
keyword_weight float DEFAULT 0.3,
match_count int DEFAULT 20
)
RETURNS TABLE (
document_id uuid,
content text,
semantic_score float,
keyword_score float,
combined_score float
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
WITH semantic_results AS (
SELECT
de.document_id,
de.content,
1 - (de.embedding <=> query_embedding) as score
FROM document_embeddings de
ORDER BY de.embedding <=> query_embedding
LIMIT 100
),
keyword_results AS (
SELECT
de.document_id,
de.content,
ts_rank(
to_tsvector('english', de.content),
plainto_tsquery('english', query_text)
) as score
FROM document_embeddings de
WHERE to_tsvector('english', de.content) @@ plainto_tsquery('english', query_text)
)
SELECT
COALESCE(s.document_id, k.document_id) as document_id,
COALESCE(s.content, k.content) as content,
COALESCE(s.score, 0.0) as semantic_score,
COALESCE(k.score, 0.0) as keyword_score,
(COALESCE(s.score, 0.0) * semantic_weight +
COALESCE(k.score, 0.0) * keyword_weight) as combined_score
FROM semantic_results s
FULL OUTER JOIN keyword_results k ON s.document_id = k.document_id
ORDER BY combined_score DESC
LIMIT match_count;
END;
$$;Filtered Search
With Metadata Filters:
sql
-- Search with multiple filters
SELECT * FROM search_documents(
query_embedding := '[0.1, 0.2, ...]'::vector(1536),
match_threshold := 0.7,
match_count := 20
)
WHERE
-- Workspace filter
document_id IN (
SELECT id FROM documents WHERE workspace_id = 'workspace-uuid'
)
-- Date filter
AND (metadata->>'created_at')::timestamp > NOW() - INTERVAL '30 days'
-- Document type filter
AND metadata @> '{"type": "pdf"}'
-- Custom tags
AND metadata->'tags' ? 'important';Re-ranking
Improve Results with Re-ranking:
typescript
async function searchWithReranking(
query: string,
workspaceId: string
) {
// Step 1: Initial semantic search (get top 100)
const candidates = await initialSemanticSearch(query, workspaceId, 100);
// Step 2: Re-rank with more sophisticated model
const reranked = await rerank({
query,
documents: candidates.map(c => c.content),
model: 'rerank-multilingual-v2.0'
});
// Step 3: Return top 20 re-ranked results
return reranked.slice(0, 20);
}Performance Optimization
Index Tuning
HNSW Parameters:
sql
-- Balanced (default)
CREATE INDEX USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- Higher recall (slower build, better search)
CREATE INDEX USING hnsw (embedding vector_cosine_ops)
WITH (m = 32, ef_construction = 128);
-- Faster build (lower recall)
CREATE INDEX USING hnsw (embedding vector_cosine_ops)
WITH (m = 8, ef_construction = 32);Query Optimization
Connection Pooling:
typescript
import { createClient } from '@supabase/supabase-js';
const supabase = createClient(url, key, {
db: {
schema: 'public',
},
global: {
headers: { 'x-my-custom-header': 'my-value' },
},
realtime: {
params: {
eventsPerSecond: 10
}
},
// Optimize for vector queries
fetch: customFetch,
auth: {
persistSession: true,
autoRefreshToken: true,
}
});Caching:
typescript
const cache = new Map();
async function cachedSearch(query: string, ttl = 3600000) {
const cacheKey = `search:${query}`;
// Check cache
if (cache.has(cacheKey)) {
const { results, timestamp } = cache.get(cacheKey);
if (Date.now() - timestamp < ttl) {
return results;
}
}
// Perform search
const results = await search(query);
// Cache results
cache.set(cacheKey, {
results,
timestamp: Date.now()
});
return results;
}Best Practices
Embedding Generation:
- Batch embed for efficiency
- Cache embeddings
- Use appropriate model
- Handle rate limits
- Monitor costs
Index Maintenance:
- Regular VACUUM
- REINDEX periodically
- Monitor index bloat
- Tune autovacuum
Query Optimization:
- Use appropriate thresholds
- Limit result count
- Apply filters early
- Cache frequent queries
- Monitor slow queries
Monitoring
Performance Metrics:
sql
-- Index statistics
SELECT * FROM pg_stat_user_indexes
WHERE indexrelname LIKE '%embedding%';
-- Table statistics
SELECT * FROM pg_stat_user_tables
WHERE relname = 'document_embeddings';
-- Query performance
EXPLAIN ANALYZE
SELECT * FROM search_documents('[...]'::vector(1536), 0.7, 20);Troubleshooting
Slow Queries:
- Check index usage
- Verify index parameters
- Reduce search scope
- Lower result count
- Add more filters
Poor Results:
- Adjust similarity threshold
- Try hybrid search
- Improve chunking strategy
- Use better embedding model
- Add re-ranking
High Costs:
- Cache embeddings
- Batch processing
- Use cheaper model for candidates
- Expensive model for re-ranking
Next Steps
- Supabase Architecture - Overall architecture
- API Authentication - API usage
- Data Flow - System design
Last Updated: October 11, 2025