Centralised embedding dimensions to 1 variable
This commit is contained in:
parent
927f406fa7
commit
4e548fdefd
8 changed files with 47 additions and 16 deletions
|
|
@ -28,7 +28,7 @@ class RoleRagDocumentAdmin(admin.ModelAdmin):
|
||||||
fields.remove('embedding')
|
fields.remove('embedding')
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
@admin.display(description=_("Embedding Preview (1536d)"))
|
@admin.display(description=_("Embedding Preview"))
|
||||||
def display_embedding(self, obj):
|
def display_embedding(self, obj):
|
||||||
if obj.embedding is not None:
|
if obj.embedding is not None:
|
||||||
preview = list(obj.embedding[:5])
|
preview = list(obj.embedding[:5])
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ class Migration(migrations.Migration):
|
||||||
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
|
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
|
||||||
('content', models.TextField()),
|
('content', models.TextField()),
|
||||||
('content_hash', models.CharField(db_index=True, max_length=64)),
|
('content_hash', models.CharField(db_index=True, max_length=64)),
|
||||||
('embedding', pgvector.django.vector.VectorField(blank=True, dimensions=1536, null=True)),
|
('embedding', pgvector.django.vector.VectorField(blank=True, dimensions=getattr(settings, 'EMBEDDING_DIMENSIONS', 768), null=True)),
|
||||||
('metadata', models.JSONField(blank=True, default=dict)),
|
('metadata', models.JSONField(blank=True, default=dict)),
|
||||||
('chunk_index', models.IntegerField(default=0)),
|
('chunk_index', models.IntegerField(default=0)),
|
||||||
('is_active', models.BooleanField(default=True)),
|
('is_active', models.BooleanField(default=True)),
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
from django.db.models import CASCADE, BooleanField, CharField, FileField, ForeignKey, IntegerField, JSONField, Model, TextField
|
from django.db.models import CASCADE, BooleanField, CharField, FileField, ForeignKey, IntegerField, JSONField, Model, TextField
|
||||||
from django.db.models.signals import post_delete, post_save
|
from django.db.models.signals import post_delete, post_save
|
||||||
|
|
@ -46,7 +47,7 @@ class RoleRagDocument(IdentifierMixin, TimeStampMixin, Model):
|
||||||
content = TextField()
|
content = TextField()
|
||||||
content_hash = CharField(max_length=64, db_index=True)
|
content_hash = CharField(max_length=64, db_index=True)
|
||||||
|
|
||||||
embedding = VectorField(dimensions=1536, null=True, blank=True)
|
embedding = VectorField(dimensions=settings.EMBEDDING_DIMENSIONS, null=True, blank=True)
|
||||||
|
|
||||||
metadata = JSONField(default=dict, blank=True)
|
metadata = JSONField(default=dict, blank=True)
|
||||||
chunk_index = IntegerField(default=0)
|
chunk_index = IntegerField(default=0)
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,8 @@ def ingest_training_file_task(self, file_uuid):
|
||||||
file_obj.status = 'ingesting'
|
file_obj.status = 'ingesting'
|
||||||
file_obj.save()
|
file_obj.save()
|
||||||
|
|
||||||
|
target_dimensions = RoleRagDocument._meta.get_field('embedding').dimensions
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw_text = _extract_text_from_training_file(file_obj)
|
raw_text = _extract_text_from_training_file(file_obj)
|
||||||
if not raw_text:
|
if not raw_text:
|
||||||
|
|
@ -65,7 +67,11 @@ def ingest_training_file_task(self, file_uuid):
|
||||||
for text_segment in _get_text_chunks(raw_text):
|
for text_segment in _get_text_chunks(raw_text):
|
||||||
response = client.post(
|
response = client.post(
|
||||||
settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
|
settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
|
||||||
json={"text": text_segment, "threshold": 95}
|
json={
|
||||||
|
"text": text_segment,
|
||||||
|
"threshold": 95,
|
||||||
|
"target_dimensions": target_dimensions,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
|
||||||
|
|
@ -68,10 +68,14 @@ class MCPRouter:
|
||||||
|
|
||||||
async def _get_embedding(self, text):
|
async def _get_embedding(self, text):
|
||||||
logger.info('MCP embedding request started')
|
logger.info('MCP embedding request started')
|
||||||
|
target_dimensions = RoleRagDocument._meta.get_field('embedding').dimensions
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
|
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
|
||||||
json={'input': text},
|
json={
|
||||||
|
'input': text,
|
||||||
|
'target_dimensions': target_dimensions,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
embedding = response.json()['data'][0]['embedding']
|
embedding = response.json()['data'][0]['embedding']
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
|
||||||
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
|
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
|
||||||
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
|
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
|
||||||
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
|
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
|
||||||
|
EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768'))
|
||||||
|
|
||||||
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
|
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
|
||||||
MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')
|
MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,6 @@ logger = logging.getLogger("gpu-node")
|
||||||
|
|
||||||
EMBED_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
|
EMBED_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
|
||||||
LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH", "/app/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")
|
LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH", "/app/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")
|
||||||
TARGET_DIMENSIONS = 1536
|
|
||||||
|
|
||||||
state: Dict[str, Any] = {}
|
state: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
|
@ -79,13 +78,29 @@ async def health():
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def pad_and_normalize(embeddings: torch.Tensor) -> torch.Tensor:
|
def _resolve_target_dimensions(payload: Dict[str, Any]) -> int:
|
||||||
"""Standardizes vector dimensions to 1536 for pgvector compatibility."""
|
raw_target = payload.get("target_dimensions")
|
||||||
|
if raw_target in (None, ""):
|
||||||
|
raise HTTPException(status_code=400, detail="'target_dimensions' is required")
|
||||||
|
|
||||||
|
try:
|
||||||
|
target = int(raw_target)
|
||||||
|
except (TypeError, ValueError) as exc:
|
||||||
|
raise HTTPException(status_code=400, detail="'target_dimensions' must be an integer") from exc
|
||||||
|
|
||||||
|
if target <= 0:
|
||||||
|
raise HTTPException(status_code=400, detail="'target_dimensions' must be > 0")
|
||||||
|
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def pad_and_normalize(embeddings: torch.Tensor, target_dimensions: int) -> torch.Tensor:
|
||||||
|
"""Dimension standardization plus L2 normalization."""
|
||||||
curr_dim = embeddings.shape[1]
|
curr_dim = embeddings.shape[1]
|
||||||
if curr_dim < TARGET_DIMENSIONS:
|
if curr_dim < target_dimensions:
|
||||||
embeddings = F.pad(embeddings, (0, TARGET_DIMENSIONS - curr_dim), "constant", 0)
|
embeddings = F.pad(embeddings, (0, target_dimensions - curr_dim), "constant", 0)
|
||||||
elif curr_dim > TARGET_DIMENSIONS:
|
elif curr_dim > target_dimensions:
|
||||||
embeddings = embeddings[:, :TARGET_DIMENSIONS]
|
embeddings = embeddings[:, :target_dimensions]
|
||||||
return F.normalize(embeddings, p=2, dim=1)
|
return F.normalize(embeddings, p=2, dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -94,6 +109,7 @@ async def embeddings(request: Request):
|
||||||
"""Generates text embeddings compatible with OpenAI API format."""
|
"""Generates text embeddings compatible with OpenAI API format."""
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
input_data = data.get("input", "")
|
input_data = data.get("input", "")
|
||||||
|
target_dimensions = _resolve_target_dimensions(data)
|
||||||
|
|
||||||
if isinstance(input_data, str):
|
if isinstance(input_data, str):
|
||||||
inputs = [input_data]
|
inputs = [input_data]
|
||||||
|
|
@ -121,7 +137,7 @@ async def embeddings(request: Request):
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
vectors = model.encode(prefixed_inputs, convert_to_tensor=True)
|
vectors = model.encode(prefixed_inputs, convert_to_tensor=True)
|
||||||
vectors = pad_and_normalize(vectors)
|
vectors = pad_and_normalize(vectors, target_dimensions=target_dimensions)
|
||||||
|
|
||||||
vector_list = vectors.cpu().tolist()
|
vector_list = vectors.cpu().tolist()
|
||||||
|
|
||||||
|
|
@ -148,6 +164,7 @@ async def semantic_chunk(request: Request):
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
raw_text = data.get("text", "")
|
raw_text = data.get("text", "")
|
||||||
threshold_percentile = data.get("threshold", 95)
|
threshold_percentile = data.get("threshold", 95)
|
||||||
|
target_dimensions = _resolve_target_dimensions(data)
|
||||||
|
|
||||||
if not raw_text:
|
if not raw_text:
|
||||||
return {"chunks": [], "embeddings": []}
|
return {"chunks": [], "embeddings": []}
|
||||||
|
|
@ -162,9 +179,11 @@ async def semantic_chunk(request: Request):
|
||||||
# Split by sentences
|
# Split by sentences
|
||||||
sentences = [s.strip() for s in raw_text.replace('\n', ' ').split('. ') if s.strip()]
|
sentences = [s.strip() for s in raw_text.replace('\n', ' ').split('. ') if s.strip()]
|
||||||
if len(sentences) < 2:
|
if len(sentences) < 2:
|
||||||
|
single = model.encode([f"search_document: {raw_text}"], convert_to_tensor=True)
|
||||||
|
single = pad_and_normalize(single, target_dimensions=target_dimensions)
|
||||||
return {
|
return {
|
||||||
"chunks": [raw_text],
|
"chunks": [raw_text],
|
||||||
"embeddings": model.encode([f"search_document: {raw_text}"]).tolist()
|
"embeddings": single.cpu().tolist(),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate sentence embeddings to find breakpoints via cosine distance
|
# Generate sentence embeddings to find breakpoints via cosine distance
|
||||||
|
|
@ -189,7 +208,7 @@ async def semantic_chunk(request: Request):
|
||||||
[f"search_document: {c}" for c in chunks],
|
[f"search_document: {c}" for c in chunks],
|
||||||
convert_to_tensor=True
|
convert_to_tensor=True
|
||||||
)
|
)
|
||||||
final_embeddings = pad_and_normalize(final_embeddings)
|
final_embeddings = pad_and_normalize(final_embeddings, target_dimensions=target_dimensions)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"chunks": chunks,
|
"chunks": chunks,
|
||||||
|
|
|
||||||
|
|
@ -461,7 +461,7 @@ embeddings. This avoids naive fixed-size splits that can break context
|
||||||
mid-concept.
|
mid-concept.
|
||||||
|
|
||||||
\underline{Vector storage and retrieval with pgvector}\\
|
\underline{Vector storage and retrieval with pgvector}\\
|
||||||
Returned chunk embeddings are stored in RoleRagDocument.embedding (1536
|
Returned chunk embeddings are stored in RoleRagDocument.embedding (768
|
||||||
dimensions) in PostgreSQL using pgvector, linked relationally to role
|
dimensions) in PostgreSQL using pgvector, linked relationally to role
|
||||||
and source file metadata. Retrieval is performed in SQL using
|
and source file metadata. Retrieval is performed in SQL using
|
||||||
cosine-distance ranking and top-k selection, allowing role filtering and
|
cosine-distance ranking and top-k selection, allowing role filtering and
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue