Hardcoded dimensions
This commit is contained in:
parent
95fc6dccf8
commit
f6ff57e51e
3 changed files with 6 additions and 29 deletions
|
|
@ -67,7 +67,6 @@ def ingest_training_file_task(self, file_uuid):
|
||||||
json={
|
json={
|
||||||
"text": text_segment,
|
"text": text_segment,
|
||||||
"threshold": 95,
|
"threshold": 95,
|
||||||
"target_dimensions": settings.EMBEDDING_DIMENSIONS,
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
|
||||||
|
|
@ -68,13 +68,11 @@ class MCPRouter:
|
||||||
|
|
||||||
async def _get_embedding(self, text):
|
async def _get_embedding(self, text):
|
||||||
logger.info('MCP embedding request started')
|
logger.info('MCP embedding request started')
|
||||||
target_dimensions = RoleRagDocument._meta.get_field('embedding').dimensions
|
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
|
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
|
||||||
json={
|
json={
|
||||||
'input': text,
|
'input': text,
|
||||||
'target_dimensions': target_dimensions,
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ logger = logging.getLogger("gpu-node")
|
||||||
|
|
||||||
EMBED_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
|
EMBED_MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5"
|
||||||
LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH", "/app/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")
|
LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH", "/app/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")
|
||||||
|
TARGET_DIMENSIONS = 768
|
||||||
|
|
||||||
state: Dict[str, Any] = {}
|
state: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
|
@ -73,25 +74,6 @@ async def health():
|
||||||
"llm_ready": state.get("llm") is not None,
|
"llm_ready": state.get("llm") is not None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _resolve_target_dimensions(payload: Dict[str, Any]) -> int:
|
|
||||||
raw_target = payload.get("target_dimensions")
|
|
||||||
if raw_target in (None, ""):
|
|
||||||
raise HTTPException(status_code=400, detail="'target_dimensions' is required and must be a positive integer")
|
|
||||||
|
|
||||||
try:
|
|
||||||
target = int(raw_target)
|
|
||||||
except (TypeError, ValueError) as exc:
|
|
||||||
logger.warning("Invalid target_dimensions value: %s", raw_target)
|
|
||||||
raise HTTPException(status_code=400, detail="'target_dimensions' must be an integer") from exc
|
|
||||||
|
|
||||||
if target <= 0:
|
|
||||||
logger.warning("Non-positive target_dimensions value: %s", target)
|
|
||||||
raise HTTPException(status_code=400, detail="'target_dimensions' must be > 0")
|
|
||||||
|
|
||||||
return target
|
|
||||||
|
|
||||||
|
|
||||||
def pad_and_normalize(embeddings: Tensor, target_dimensions: int) -> Tensor:
|
def pad_and_normalize(embeddings: Tensor, target_dimensions: int) -> Tensor:
|
||||||
curr_dim = embeddings.shape[1]
|
curr_dim = embeddings.shape[1]
|
||||||
if curr_dim < target_dimensions:
|
if curr_dim < target_dimensions:
|
||||||
|
|
@ -108,8 +90,7 @@ async def embeddings(request: Request):
|
||||||
input_kind = type(input_data).__name__
|
input_kind = type(input_data).__name__
|
||||||
input_count = len(input_data) if isinstance(input_data, list) else (1 if isinstance(input_data, str) else 0)
|
input_count = len(input_data) if isinstance(input_data, list) else (1 if isinstance(input_data, str) else 0)
|
||||||
logger.info("/v1/embeddings request received: input_kind=%s input_count=%s", input_kind, input_count)
|
logger.info("/v1/embeddings request received: input_kind=%s input_count=%s", input_kind, input_count)
|
||||||
target_dimensions = _resolve_target_dimensions(data)
|
logger.info("/v1/embeddings using target_dimensions=%s", TARGET_DIMENSIONS)
|
||||||
logger.info("/v1/embeddings resolved target_dimensions=%s", target_dimensions)
|
|
||||||
|
|
||||||
if isinstance(input_data, str):
|
if isinstance(input_data, str):
|
||||||
inputs = [input_data]
|
inputs = [input_data]
|
||||||
|
|
@ -138,7 +119,7 @@ async def embeddings(request: Request):
|
||||||
|
|
||||||
with no_grad():
|
with no_grad():
|
||||||
vectors = model.encode(prefixed_inputs, convert_to_tensor=True)
|
vectors = model.encode(prefixed_inputs, convert_to_tensor=True)
|
||||||
vectors = pad_and_normalize(vectors, target_dimensions=target_dimensions)
|
vectors = pad_and_normalize(vectors, target_dimensions=TARGET_DIMENSIONS)
|
||||||
|
|
||||||
vector_list = vectors.cpu().tolist()
|
vector_list = vectors.cpu().tolist()
|
||||||
|
|
||||||
|
|
@ -166,8 +147,7 @@ async def semantic_chunk(request: Request):
|
||||||
threshold_percentile = data.get("threshold", 95)
|
threshold_percentile = data.get("threshold", 95)
|
||||||
raw_text_len = len(raw_text) if isinstance(raw_text, str) else -1
|
raw_text_len = len(raw_text) if isinstance(raw_text, str) else -1
|
||||||
logger.info("/v1/semantic-chunk request received: text_len=%s threshold=%s", raw_text_len, threshold_percentile,)
|
logger.info("/v1/semantic-chunk request received: text_len=%s threshold=%s", raw_text_len, threshold_percentile,)
|
||||||
target_dimensions = _resolve_target_dimensions(data)
|
logger.info("/v1/semantic-chunk using target_dimensions=%s", TARGET_DIMENSIONS)
|
||||||
logger.info("/v1/semantic-chunk resolved target_dimensions=%s", target_dimensions)
|
|
||||||
|
|
||||||
if not raw_text:
|
if not raw_text:
|
||||||
logger.info("/v1/semantic-chunk empty text payload")
|
logger.info("/v1/semantic-chunk empty text payload")
|
||||||
|
|
@ -185,7 +165,7 @@ async def semantic_chunk(request: Request):
|
||||||
sentences = [s.strip() for s in raw_text.replace('\n', ' ').split('. ') if s.strip()]
|
sentences = [s.strip() for s in raw_text.replace('\n', ' ').split('. ') if s.strip()]
|
||||||
if len(sentences) < 2:
|
if len(sentences) < 2:
|
||||||
single = model.encode([f"search_document: {raw_text}"], convert_to_tensor=True)
|
single = model.encode([f"search_document: {raw_text}"], convert_to_tensor=True)
|
||||||
single = pad_and_normalize(single, target_dimensions=target_dimensions)
|
single = pad_and_normalize(single, target_dimensions=TARGET_DIMENSIONS)
|
||||||
return {
|
return {
|
||||||
"chunks": [raw_text],
|
"chunks": [raw_text],
|
||||||
"embeddings": single.cpu().tolist(),
|
"embeddings": single.cpu().tolist(),
|
||||||
|
|
@ -212,7 +192,7 @@ async def semantic_chunk(request: Request):
|
||||||
[f"search_document: {c}" for c in chunks],
|
[f"search_document: {c}" for c in chunks],
|
||||||
convert_to_tensor=True
|
convert_to_tensor=True
|
||||||
)
|
)
|
||||||
final_embeddings = pad_and_normalize(final_embeddings, target_dimensions=target_dimensions)
|
final_embeddings = pad_and_normalize(final_embeddings, target_dimensions=TARGET_DIMENSIONS)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"chunks": chunks,
|
"chunks": chunks,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue