Consolidated endpoints to settings file

This commit is contained in:
Viswamedha Nalabotu 2026-03-08 13:16:26 +00:00
parent 4843ecf9c0
commit c64c1d90c4
5 changed files with 13 additions and 6 deletions

View file

@ -9,6 +9,7 @@ from pypdf import PdfReader
from apps.knowledge.models import RoleRagDocument, TrainingFile from apps.knowledge.models import RoleRagDocument, TrainingFile
def _decode_text_bytes(raw_bytes: bytes) -> str: def _decode_text_bytes(raw_bytes: bytes) -> str:
try: try:
return raw_bytes.decode('utf-8') return raw_bytes.decode('utf-8')
@ -63,7 +64,7 @@ def ingest_training_file_task(self, file_uuid):
for text_segment in _get_text_chunks(raw_text): for text_segment in _get_text_chunks(raw_text):
response = client.post( response = client.post(
f"{settings.INFERENCE_URL}/v1/semantic-chunk", settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
json={"text": text_segment, "threshold": 95} json={"text": text_segment, "threshold": 95}
) )
response.raise_for_status() response.raise_for_status()

View file

@ -305,7 +305,7 @@ class OnboardingConsumer(AsyncWebsocketConsumer):
try: try:
response = await client.post( response = await client.post(
f"{settings.INFERENCE_URL}/v1/chat/completions", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={
"model": llm_config.get("model_id", "meta-llama-3.1-8b"), "model": llm_config.get("model_id", "meta-llama-3.1-8b"),
"messages": messages, "messages": messages,

View file

@ -6,6 +6,7 @@ from pgvector.django import CosineDistance
from apps.knowledge.models import RoleRagDocument from apps.knowledge.models import RoleRagDocument
from apps.onboarding.models import OnboardingSession from apps.onboarding.models import OnboardingSession
class MCPRouter: class MCPRouter:
def get_tool_definitions(self): def get_tool_definitions(self):
@ -47,7 +48,7 @@ class MCPRouter:
async def _get_embedding(self, text): async def _get_embedding(self, text):
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
response = await client.post( response = await client.post(
f"{settings.INFERENCE_URL}/v1/embeddings", settings.INFERENCE_EMBEDDINGS_ENDPOINT,
json={"input": text} json={"input": text}
) )

View file

@ -15,6 +15,7 @@ from apps.accounts.permissions import CanManageOrganization, can_manage_organiza
from apps.onboarding.models import AgentConfig, AgentInteractionLog, OnboardingFlow, OnboardingSession from apps.onboarding.models import AgentConfig, AgentInteractionLog, OnboardingFlow, OnboardingSession
from apps.onboarding.serializers import AgentConfigSerializer, AgentInteractionLogSerializer, OnboardingFlowSerializer, OnboardingSessionSerializer from apps.onboarding.serializers import AgentConfigSerializer, AgentInteractionLogSerializer, OnboardingFlowSerializer, OnboardingSessionSerializer
class OnboardingFlowViewSet(ModelViewSet): class OnboardingFlowViewSet(ModelViewSet):
queryset = OnboardingFlow.objects.all() queryset = OnboardingFlow.objects.all()
serializer_class = OnboardingFlowSerializer serializer_class = OnboardingFlowSerializer
@ -445,7 +446,7 @@ class OnboardingSessionViewSet(ModelViewSet):
try: try:
with httpx.Client(timeout=60.0) as client: with httpx.Client(timeout=60.0) as client:
response = client.post( response = client.post(
f"{settings.INFERENCE_URL}/v1/chat/completions", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={
"model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"), "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
"messages": [ "messages": [
@ -482,7 +483,7 @@ class OnboardingSessionViewSet(ModelViewSet):
try: try:
with httpx.Client(timeout=60.0) as client: with httpx.Client(timeout=60.0) as client:
response = client.post( response = client.post(
f"{settings.INFERENCE_URL}/v1/chat/completions", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={
"model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"), "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
"messages": [ "messages": [

View file

@ -3,8 +3,9 @@ Django settings will use prefix of DJANGO_ for environment variables.
""" """
import os import os
from pathlib import Path
import sys import sys
from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
BASE_DIR = Path(__file__).resolve().parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
@ -27,6 +28,9 @@ DJANGO_CELERY_BROKER_URL = os.getenv('DJANGO_CELERY_BROKER_URL', 'redis://localh
INFERENCE_HOST = os.getenv('INFERENCE_HOST', 'localhost') INFERENCE_HOST = os.getenv('INFERENCE_HOST', 'localhost')
INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001') INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001')
INFERENCE_URL = f"http://{INFERENCE_HOST}:{INFERENCE_PORT}" INFERENCE_URL = f"http://{INFERENCE_HOST}:{INFERENCE_PORT}"
INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600')) INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/') STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')