Consolidated endpoints to settings file
This commit is contained in:
parent
4843ecf9c0
commit
c64c1d90c4
5 changed files with 13 additions and 6 deletions
|
|
@ -9,6 +9,7 @@ from pypdf import PdfReader
|
|||
|
||||
from apps.knowledge.models import RoleRagDocument, TrainingFile
|
||||
|
||||
|
||||
def _decode_text_bytes(raw_bytes: bytes) -> str:
|
||||
try:
|
||||
return raw_bytes.decode('utf-8')
|
||||
|
|
@ -63,7 +64,7 @@ def ingest_training_file_task(self, file_uuid):
|
|||
|
||||
for text_segment in _get_text_chunks(raw_text):
|
||||
response = client.post(
|
||||
f"{settings.INFERENCE_URL}/v1/semantic-chunk",
|
||||
settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
|
||||
json={"text": text_segment, "threshold": 95}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
|
|
|||
|
|
@ -305,7 +305,7 @@ class OnboardingConsumer(AsyncWebsocketConsumer):
|
|||
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{settings.INFERENCE_URL}/v1/chat/completions",
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
"model": llm_config.get("model_id", "meta-llama-3.1-8b"),
|
||||
"messages": messages,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from pgvector.django import CosineDistance
|
|||
from apps.knowledge.models import RoleRagDocument
|
||||
from apps.onboarding.models import OnboardingSession
|
||||
|
||||
|
||||
class MCPRouter:
|
||||
|
||||
def get_tool_definitions(self):
|
||||
|
|
@ -47,7 +48,7 @@ class MCPRouter:
|
|||
async def _get_embedding(self, text):
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{settings.INFERENCE_URL}/v1/embeddings",
|
||||
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
|
||||
json={"input": text}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from apps.accounts.permissions import CanManageOrganization, can_manage_organiza
|
|||
from apps.onboarding.models import AgentConfig, AgentInteractionLog, OnboardingFlow, OnboardingSession
|
||||
from apps.onboarding.serializers import AgentConfigSerializer, AgentInteractionLogSerializer, OnboardingFlowSerializer, OnboardingSessionSerializer
|
||||
|
||||
|
||||
class OnboardingFlowViewSet(ModelViewSet):
|
||||
queryset = OnboardingFlow.objects.all()
|
||||
serializer_class = OnboardingFlowSerializer
|
||||
|
|
@ -445,7 +446,7 @@ class OnboardingSessionViewSet(ModelViewSet):
|
|||
try:
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{settings.INFERENCE_URL}/v1/chat/completions",
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
"model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
|
||||
"messages": [
|
||||
|
|
@ -482,7 +483,7 @@ class OnboardingSessionViewSet(ModelViewSet):
|
|||
try:
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{settings.INFERENCE_URL}/v1/chat/completions",
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
"model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
|
||||
"messages": [
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@ Django settings will use prefix of DJANGO_ for environment variables.
|
|||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
|
@ -27,6 +28,9 @@ DJANGO_CELERY_BROKER_URL = os.getenv('DJANGO_CELERY_BROKER_URL', 'redis://localh
|
|||
INFERENCE_HOST = os.getenv('INFERENCE_HOST', 'localhost')
|
||||
INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001')
|
||||
INFERENCE_URL = f"http://{INFERENCE_HOST}:{INFERENCE_PORT}"
|
||||
INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
|
||||
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
|
||||
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
|
||||
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
|
||||
|
||||
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
|
||||
|
|
|
|||
Loading…
Reference in a new issue