Moved config values to settings

2026-03-22 20:04:14 +00:00 · 2026-03-22 20:04:14 +00:00 · 9044e2afaa
commit 9044e2afaa
parent a6ed273a13
6 changed files with 15 additions and 15 deletions
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@ -64,16 +64,13 @@ def ingest_training_file_task(self, file_uuid):
        all_documents = []
        chunk_counter = 0
-        timeout = Timeout(60.0) 
+        with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
-
+            for text_segment in _get_text_chunks(raw_text, size=settings.INGESTION_CHUNK_SIZE):
        with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
            for text_segment in _get_text_chunks(raw_text):
                response = client.post(
                    settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
                    json={
                        "text": text_segment,
-                        "threshold": 95,
+                        "threshold": settings.SEMANTIC_CHUNK_THRESHOLD,
                    },
                )
                response.raise_for_status()
@ -180,14 +177,13 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
    ]
    try:
-        with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client:
+        with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
            for agent_type, user_prompt in refine_calls:
                if agent_type not in configs:
                    continue
                response = client.post(
                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                    json={
                        "model": "meta-llama-3.1-8b-instruct",
                        "messages": [{"role": "user", "content": user_prompt}],
                        "max_tokens": 600,
                    },
--- a/apps/onboarding/consumers/base.py
+++ b/apps/onboarding/consumers/base.py
@ -78,7 +78,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
    ### MCP Handling ###
    async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5, 
-                          max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: int = 60.0) -> str:
+                          max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: float = settings.INFERENCE_REQUEST_TIMEOUT) -> str:
        """ 
        Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps. 
        """
@ -153,7 +153,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
            payload["stop"] = stop
        try:
            chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
+            async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
--- a/apps/onboarding/consumers/knowledge.py
+++ b/apps/onboarding/consumers/knowledge.py
@ -111,7 +111,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
            payload['stop'] = stop
        try:
            chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
+            async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@ -65,7 +65,7 @@ class MCPRouter:
    async def _get_embedding(self, text):
        logger.info('MCP embedding request started')
-        async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+        async with httpx.AsyncClient(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
            response = await client.post(
                settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                json={'input': text},
--- a/apps/onboarding/viewsets.py
+++ b/apps/onboarding/viewsets.py
@ -545,7 +545,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
            prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
            try:
-                with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+                with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                    response = client.post(
                        settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                        json={
@ -754,7 +754,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
        context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
        prompt = f"Context:\n{context}\n\nQuestion: {message}"
        try:
-            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+            with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                response = client.post(
                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                    json={
@ -784,7 +784,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
            f"Return only the revised page body."
        )
        try:
-            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+            with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                response = client.post(
                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                    json={
--- a/config/settings.py
+++ b/config/settings.py
@ -35,7 +35,11 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
 INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
 INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
 INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
 INFERENCE_REQUEST_TIMEOUT = float(os.getenv('INFERENCE_REQUEST_TIMEOUT', '60'))
 INFERENCE_STREAM_TIMEOUT = float(os.getenv('INFERENCE_STREAM_TIMEOUT', '120'))
 EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768'))
 INGESTION_CHUNK_SIZE = int(os.getenv('INGESTION_CHUNK_SIZE', '10000'))
 SEMANTIC_CHUNK_THRESHOLD = int(os.getenv('SEMANTIC_CHUNK_THRESHOLD', '95'))
 STATIC_URL  = os.getenv('DJANGO_STATIC_URL',  '/static/')
 MEDIA_URL   = os.getenv('DJANGO_MEDIA_URL',  '/media/')