Moved config values to settings

This commit is contained in:
Viswamedha Nalabotu 2026-03-22 20:04:14 +00:00
parent a6ed273a13
commit 9044e2afaa
6 changed files with 15 additions and 15 deletions

View file

@ -64,16 +64,13 @@ def ingest_training_file_task(self, file_uuid):
all_documents = [] all_documents = []
chunk_counter = 0 chunk_counter = 0
timeout = Timeout(60.0) with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
for text_segment in _get_text_chunks(raw_text, size=settings.INGESTION_CHUNK_SIZE):
with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
for text_segment in _get_text_chunks(raw_text):
response = client.post( response = client.post(
settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT, settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
json={ json={
"text": text_segment, "text": text_segment,
"threshold": 95, "threshold": settings.SEMANTIC_CHUNK_THRESHOLD,
}, },
) )
response.raise_for_status() response.raise_for_status()
@ -180,14 +177,13 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
] ]
try: try:
with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client: with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
for agent_type, user_prompt in refine_calls: for agent_type, user_prompt in refine_calls:
if agent_type not in configs: if agent_type not in configs:
continue continue
response = client.post( response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={
"model": "meta-llama-3.1-8b-instruct",
"messages": [{"role": "user", "content": user_prompt}], "messages": [{"role": "user", "content": user_prompt}],
"max_tokens": 600, "max_tokens": 600,
}, },

View file

@ -78,7 +78,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
### MCP Handling ### ### MCP Handling ###
async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5, async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5,
max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: int = 60.0) -> str: max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: float = settings.INFERENCE_REQUEST_TIMEOUT) -> str:
""" """
Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps. Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps.
""" """
@ -153,7 +153,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
payload["stop"] = stop payload["stop"] = stop
try: try:
chunks: list[str] = [] chunks: list[str] = []
async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client: async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response: async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
response.raise_for_status() response.raise_for_status()
async for line in response.aiter_lines(): async for line in response.aiter_lines():

View file

@ -111,7 +111,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
payload['stop'] = stop payload['stop'] = stop
try: try:
chunks: list[str] = [] chunks: list[str] = []
async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client: async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response: async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
response.raise_for_status() response.raise_for_status()
async for line in response.aiter_lines(): async for line in response.aiter_lines():

View file

@ -65,7 +65,7 @@ class MCPRouter:
async def _get_embedding(self, text): async def _get_embedding(self, text):
logger.info('MCP embedding request started') logger.info('MCP embedding request started')
async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: async with httpx.AsyncClient(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
response = await client.post( response = await client.post(
settings.INFERENCE_EMBEDDINGS_ENDPOINT, settings.INFERENCE_EMBEDDINGS_ENDPOINT,
json={'input': text}, json={'input': text},

View file

@ -545,7 +545,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses) prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
try: try:
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
response = client.post( response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={
@ -754,7 +754,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
prompt = f"Context:\n{context}\n\nQuestion: {message}" prompt = f"Context:\n{context}\n\nQuestion: {message}"
try: try:
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
response = client.post( response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={
@ -784,7 +784,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
f"Return only the revised page body." f"Return only the revised page body."
) )
try: try:
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
response = client.post( response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={ json={

View file

@ -35,7 +35,11 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings" INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions" INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600')) INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
INFERENCE_REQUEST_TIMEOUT = float(os.getenv('INFERENCE_REQUEST_TIMEOUT', '60'))
INFERENCE_STREAM_TIMEOUT = float(os.getenv('INFERENCE_STREAM_TIMEOUT', '120'))
EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768')) EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768'))
INGESTION_CHUNK_SIZE = int(os.getenv('INGESTION_CHUNK_SIZE', '10000'))
SEMANTIC_CHUNK_THRESHOLD = int(os.getenv('SEMANTIC_CHUNK_THRESHOLD', '95'))
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/') STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/') MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')