From f6ff57e51e8478bd3bc5f4ed1d38f2e97dffe5b1 Mon Sep 17 00:00:00 2001 From: Viswamedha Nalabotu Date: Wed, 11 Mar 2026 21:33:17 +0000 Subject: [PATCH] Hardcoded dimensions --- apps/knowledge/tasks.py | 1 - apps/onboarding/mcp.py | 2 -- gpu_server.py | 32 ++++++-------------------------- 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py index 46f51cd..65e7476 100644 --- a/apps/knowledge/tasks.py +++ b/apps/knowledge/tasks.py @@ -67,7 +67,6 @@ def ingest_training_file_task(self, file_uuid): json={ "text": text_segment, "threshold": 95, - "target_dimensions": settings.EMBEDDING_DIMENSIONS, }, ) response.raise_for_status() diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py index 9227cba..f205801 100644 --- a/apps/onboarding/mcp.py +++ b/apps/onboarding/mcp.py @@ -68,13 +68,11 @@ class MCPRouter: async def _get_embedding(self, text): logger.info('MCP embedding request started') - target_dimensions = RoleRagDocument._meta.get_field('embedding').dimensions async with httpx.AsyncClient() as client: response = await client.post( settings.INFERENCE_EMBEDDINGS_ENDPOINT, json={ 'input': text, - 'target_dimensions': target_dimensions, }, ) response.raise_for_status() diff --git a/gpu_server.py b/gpu_server.py index 9b60eea..5584d55 100644 --- a/gpu_server.py +++ b/gpu_server.py @@ -20,6 +20,7 @@ logger = logging.getLogger("gpu-node") EMBED_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5" LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH", "/app/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf") +TARGET_DIMENSIONS = 768 state: Dict[str, Any] = {} @@ -73,25 +74,6 @@ async def health(): "llm_ready": state.get("llm") is not None, } - -def _resolve_target_dimensions(payload: Dict[str, Any]) -> int: - raw_target = payload.get("target_dimensions") - if raw_target in (None, ""): - raise HTTPException(status_code=400, detail="'target_dimensions' is required and must be a positive integer") - - try: - target = int(raw_target) - except (TypeError, ValueError) as exc: - logger.warning("Invalid target_dimensions value: %s", raw_target) - raise HTTPException(status_code=400, detail="'target_dimensions' must be an integer") from exc - - if target <= 0: - logger.warning("Non-positive target_dimensions value: %s", target) - raise HTTPException(status_code=400, detail="'target_dimensions' must be > 0") - - return target - - def pad_and_normalize(embeddings: Tensor, target_dimensions: int) -> Tensor: curr_dim = embeddings.shape[1] if curr_dim < target_dimensions: @@ -108,8 +90,7 @@ async def embeddings(request: Request): input_kind = type(input_data).__name__ input_count = len(input_data) if isinstance(input_data, list) else (1 if isinstance(input_data, str) else 0) logger.info("/v1/embeddings request received: input_kind=%s input_count=%s", input_kind, input_count) - target_dimensions = _resolve_target_dimensions(data) - logger.info("/v1/embeddings resolved target_dimensions=%s", target_dimensions) + logger.info("/v1/embeddings using target_dimensions=%s", TARGET_DIMENSIONS) if isinstance(input_data, str): inputs = [input_data] @@ -138,7 +119,7 @@ async def embeddings(request: Request): with no_grad(): vectors = model.encode(prefixed_inputs, convert_to_tensor=True) - vectors = pad_and_normalize(vectors, target_dimensions=target_dimensions) + vectors = pad_and_normalize(vectors, target_dimensions=TARGET_DIMENSIONS) vector_list = vectors.cpu().tolist() @@ -166,8 +147,7 @@ async def semantic_chunk(request: Request): threshold_percentile = data.get("threshold", 95) raw_text_len = len(raw_text) if isinstance(raw_text, str) else -1 logger.info("/v1/semantic-chunk request received: text_len=%s threshold=%s", raw_text_len, threshold_percentile,) - target_dimensions = _resolve_target_dimensions(data) - logger.info("/v1/semantic-chunk resolved target_dimensions=%s", target_dimensions) + logger.info("/v1/semantic-chunk using target_dimensions=%s", TARGET_DIMENSIONS) if not raw_text: logger.info("/v1/semantic-chunk empty text payload") @@ -185,7 +165,7 @@ async def semantic_chunk(request: Request): sentences = [s.strip() for s in raw_text.replace('\n', ' ').split('. ') if s.strip()] if len(sentences) < 2: single = model.encode([f"search_document: {raw_text}"], convert_to_tensor=True) - single = pad_and_normalize(single, target_dimensions=target_dimensions) + single = pad_and_normalize(single, target_dimensions=TARGET_DIMENSIONS) return { "chunks": [raw_text], "embeddings": single.cpu().tolist(), @@ -212,7 +192,7 @@ async def semantic_chunk(request: Request): [f"search_document: {c}" for c in chunks], convert_to_tensor=True ) - final_embeddings = pad_and_normalize(final_embeddings, target_dimensions=target_dimensions) + final_embeddings = pad_and_normalize(final_embeddings, target_dimensions=TARGET_DIMENSIONS) return { "chunks": chunks,