Consolidated endpoints to settings file

2026-03-08 13:16:26 +00:00 · 2026-03-08 13:16:26 +00:00 · c64c1d90c4
commit c64c1d90c4
parent 4843ecf9c0
5 changed files with 13 additions and 6 deletions
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@ -9,6 +9,7 @@ from pypdf import PdfReader

 from apps.knowledge.models import RoleRagDocument, TrainingFile

+
 def _decode_text_bytes(raw_bytes: bytes) -> str:
    try:
        return raw_bytes.decode('utf-8')
@ -63,7 +64,7 @@ def ingest_training_file_task(self, file_uuid):
                                                              
            for text_segment in _get_text_chunks(raw_text):
                response = client.post(
-                    f"{settings.INFERENCE_URL}/v1/semantic-chunk",
+                    settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
                    json={"text": text_segment, "threshold": 95}
                )
                response.raise_for_status()
--- a/apps/onboarding/consumers.py
+++ b/apps/onboarding/consumers.py
@ -305,7 +305,7 @@ class OnboardingConsumer(AsyncWebsocketConsumer):
                
                try:
                    response = await client.post(
-                        f"{settings.INFERENCE_URL}/v1/chat/completions",
+                        settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                        json={
                            "model": llm_config.get("model_id", "meta-llama-3.1-8b"),
                            "messages": messages,
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@ -6,6 +6,7 @@ from pgvector.django import CosineDistance
 from apps.knowledge.models import RoleRagDocument
 from apps.onboarding.models import OnboardingSession

+
 class MCPRouter:

    def get_tool_definitions(self):
@ -47,7 +48,7 @@ class MCPRouter:
    async def _get_embedding(self, text):
        async with httpx.AsyncClient() as client:
            response = await client.post(
-                f"{settings.INFERENCE_URL}/v1/embeddings",                                           
+                settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                json={"input": text}
            )
                                                         
--- a/apps/onboarding/viewsets.py
+++ b/apps/onboarding/viewsets.py
@ -15,6 +15,7 @@ from apps.accounts.permissions import CanManageOrganization, can_manage_organiza
 from apps.onboarding.models import AgentConfig, AgentInteractionLog, OnboardingFlow, OnboardingSession
 from apps.onboarding.serializers import AgentConfigSerializer, AgentInteractionLogSerializer, OnboardingFlowSerializer, OnboardingSessionSerializer

+
 class OnboardingFlowViewSet(ModelViewSet):
    queryset = OnboardingFlow.objects.all()
    serializer_class = OnboardingFlowSerializer
@ -445,7 +446,7 @@ class OnboardingSessionViewSet(ModelViewSet):
        try:
            with httpx.Client(timeout=60.0) as client:
                response = client.post(
-                    f"{settings.INFERENCE_URL}/v1/chat/completions",
+                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                    json={
                        "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
                        "messages": [
@ -482,7 +483,7 @@ class OnboardingSessionViewSet(ModelViewSet):
        try:
            with httpx.Client(timeout=60.0) as client:
                response = client.post(
-                    f"{settings.INFERENCE_URL}/v1/chat/completions",
+                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                    json={
                        "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
                        "messages": [
--- a/config/settings.py
+++ b/config/settings.py
@ -3,8 +3,9 @@ Django settings will use prefix of DJANGO_ for environment variables.
 """

 import os
-from pathlib import Path
 import sys
+from pathlib import Path
+
 from dotenv import load_dotenv

 BASE_DIR = Path(__file__).resolve().parent.parent
@ -27,6 +28,9 @@ DJANGO_CELERY_BROKER_URL = os.getenv('DJANGO_CELERY_BROKER_URL', 'redis://localh
 INFERENCE_HOST = os.getenv('INFERENCE_HOST', 'localhost')
 INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001')
 INFERENCE_URL = f"http://{INFERENCE_HOST}:{INFERENCE_PORT}"
+INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
+INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
+INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
 INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))

 STATIC_URL  = os.getenv('DJANGO_STATIC_URL',  '/static/')