Added streaming and chunking with other fixes with KA consumer

2026-03-18 20:07:24 +00:00 · 2026-03-18 20:07:24 +00:00 · e818991ae3
commit e818991ae3
parent d19c50cf77
17 changed files with 644 additions and 414 deletions
--- a/apps/onboarding/consumers/init.py
+++ b/apps/onboarding/consumers/init.py
@ -1,4 +1,5 @@
 from .base import *
 from .chat import *
 from .generate import *
+from .knowledge import *
 from .progress import *
--- a/apps/onboarding/consumers/base.py
+++ b/apps/onboarding/consumers/base.py
@ -26,8 +26,9 @@ class LogType(Enum):
    INFO        = "info"        # Debug/Verbose logs
    THOUGHT     = "thought"     # AI internal reasoning turns
    TOOL_START  = "tool_start"  # When an MCP tool is called
-    TOOL_RESULT = "tool_result" # When data comes back from a tool
-    COMPLETED   = "completed"   # The final completion signal
+    TOOL_RESULT  = "tool_result"  # When data comes back from a tool
+    STREAM_CHUNK = "stream_chunk" # Incremental token from a streaming LLM response
+    COMPLETED    = "completed"    # The final completion signal

 class BaseOnboardingConsumer(AsyncWebsocketConsumer):
    """
@ -135,6 +136,53 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
                    return f"Error: {str(e)}"
        return last_content

+    async def stream_llm(self, config, prompt: str, *, max_tokens: int = 1024, stop: list[str] | None = None, system_prompt_suffix: str | None = None) -> str | None:
+        """Single-turn streaming LLM call. Sends STREAM_CHUNK events for each token and returns the full text."""
+        if not config:
+            return None
+        system_prompt = config.system_prompt or OnboardingPrompts.default_system_prompt()
+        if system_prompt_suffix:
+            system_prompt = system_prompt + "\n\n" + system_prompt_suffix
+        llm_config = config.llm_config if isinstance(config.llm_config, dict) else {}
+        payload: dict = {
+            "model": llm_config.get("model_id", "meta-llama-3.1-8b"),
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": prompt},
+            ],
+            "max_tokens": max_tokens,
+            "stream": True,
+        }
+        if stop:
+            payload["stop"] = stop
+        try:
+            chunks: list[str] = []
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if not line.startswith("data: "):
+                            continue
+                        data = line[6:].strip()
+                        if data == "[DONE]":
+                            break
+                        try:
+                            chunk_obj = json.loads(data)
+                            choice = chunk_obj["choices"][0]
+                            delta = choice.get("delta", {}).get("content", "")
+                            if delta:
+                                chunks.append(delta)
+                                await self.send_log(LogType.STREAM_CHUNK, delta)
+                            if choice.get("finish_reason") == "length":
+                                self.logger.warning("LLM response truncated (finish_reason=length)")
+                                await self.send_log(LogType.STATUS, "Response was cut off, try increasing Max Tokens.")
+                        except Exception:
+                            continue
+            return "".join(chunks).strip() or None
+        except Exception as e:
+            self.logger.exception("Streaming LLM call failed: %s", e)
+            return None
+
    ### Regular Helpers ###
    async def send_log(self, log_type: LogType, message: str, content: str | dict | None = None):
        if log_type == LogType.ERROR:
--- a/apps/onboarding/consumers/chat.py
+++ b/apps/onboarding/consumers/chat.py
@ -21,5 +21,10 @@ class OnboardingChatConsumer(BaseOnboardingConsumer):
        if config is None:
            await self.send_error("Forbidden or Invalid Config UUID")
            return
-        response = await self.orchestrate(user_query, config, max_tokens=max_tokens)        
-        await self.send_log(LogType.COMPLETED, "Inferenced complete.", {"response": response})
+        response = await self.stream_llm(
+            config,
+            user_query,
+            max_tokens=max_tokens or 1024,
+            system_prompt_suffix="Respond in plain text only. Do not use markdown formatting, bullet points, headers, bold, italics, or code blocks.",
+        )
+        await self.send_log(LogType.COMPLETED, "Inference complete.", {"response": response or ""})
--- a/apps/onboarding/consumers/generate.py
+++ b/apps/onboarding/consumers/generate.py
@ -54,7 +54,7 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer):
            context_markdown = self.format_knowledge_context(knowledge_hits)
            ka_response = await self.orchestrate(
                OnboardingPrompts.knowledge_generation_prompt(topic, context_markdown),
-                ka_config, minimum_turns=2, max_tokens=2400
+                ka_config, minimum_turns=2, max_tokens=3500
            )
            full_structure.append({
                "title": topic,
--- a/apps/onboarding/consumers/knowledge.py
+++ b/apps/onboarding/consumers/knowledge.py
@ -0,0 +1,213 @@
+import json
+import re
+
+import httpx
+
+from channels.db import database_sync_to_async
+from django.conf import settings
+from django.utils import timezone
+
+from apps.onboarding.consumers.base import BaseOnboardingConsumer, LogType
+from apps.onboarding.consumers.prompts import OnboardingPrompts
+from apps.onboarding.models import AgentInteractionLog, OnboardingSession
+
+__all__ = ['OnboardingKnowledgeConsumer']
+
+class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
+    """
+    Route: /ws/onboarding/knowledge/<uuid:session_uuid>/
+    """
+
+    session_uuid: str
+
+    def parse_extra(self):
+        self.session_uuid = self.scope['url_route']['kwargs'].get('session_uuid')
+
+    async def action_ask(self, data: dict):
+        page_uuid = data.get('page_uuid')
+        user_message = data.get('message')
+        mode = str(data.get('mode', 'separate'))
+
+        if not page_uuid or not user_message:
+            return await self.send_error('page_uuid and message are required.')
+
+        session = await self.get_session(self.session_uuid, self.user.id)
+        if not session:
+            return await self.send_error('Session not found or access denied.')
+
+        if not session.flow:
+            return await self.send_error('Onboarding flow not found.')
+
+        page = self._get_page(session.flow, str(page_uuid))
+        if not isinstance(page, dict):
+            return await self.send_error('Page not found in this flow.')
+
+        page_title = str(page.get('title') or 'Onboarding Page')
+        page_body = str(page.get('body') or '')
+        role_name = session.role.name
+        role_uuid = str(session.role.uuid)
+
+        config = await self.get_config_by_type(role_uuid, 'knowledge')
+
+        updated_page = False
+        revised_body = None
+        assistant_message = ''
+
+        if mode == 'update_page':
+            await self.send_log(LogType.STATUS, 'Revising page content...')
+            revised_body = await self._call_llm(
+                config,
+                OnboardingPrompts.ka_page_revision_prompt(role_name, page_title, page_body, str(user_message)),
+                max_tokens=3000,
+                stop=['\n[END]', '[END]'],
+            )
+            if revised_body:
+                await self.save_page_override(session, str(page_uuid), revised_body)
+                updated_page = True
+                assistant_message = (
+                    'Updated this page by integrating your clarification request into the core content. '
+                    'Please review the revised page text above.'
+                )
+
+        if not assistant_message:
+            await self.send_log(LogType.STATUS, 'Thinking...')
+            if config:
+                assistant_message = await self._call_llm(
+                    config,
+                    OnboardingPrompts.ka_help_prompt(role_name, page_title, page_body, str(user_message)),
+                    max_tokens=1024,
+                ) or OnboardingPrompts.KA_HELP_FALLBACK
+            else:
+                assistant_message = OnboardingPrompts.KA_HELP_FALLBACK
+
+        await self.save_page_help(session, str(page_uuid), str(user_message), assistant_message)
+        await self.log_interaction(session, str(user_message), assistant_message, str(page_uuid), mode, updated_page)
+
+        await self.send_log(LogType.COMPLETED, assistant_message, {
+            'updated_page': updated_page,
+            'revised_page_body': revised_body if mode == 'update_page' else None,
+        })
+
+    async def _call_llm(
+        self,
+        config,
+        prompt: str,
+        *,
+        max_tokens: int = 1024,
+        stop: list[str] | None = None,
+    ) -> str | None:
+        if not config:
+            return None
+        system_prompt = config.system_prompt or OnboardingPrompts.FALLBACK_SYSTEM_PROMPT
+        llm_config = config.llm_config if isinstance(config.llm_config, dict) else {}
+        payload: dict = {
+            'model': llm_config.get('model_id', 'meta-llama-3.1-8b'),
+            'messages': [
+                {'role': 'system', 'content': system_prompt},
+                {'role': 'user', 'content': prompt},
+            ],
+            'max_tokens': max_tokens,
+            'stream': True,
+        }
+        if stop:
+            payload['stop'] = stop
+        try:
+            chunks: list[str] = []
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if not line.startswith('data: '):
+                            continue
+                        data = line[6:].strip()
+                        if data == '[DONE]':
+                            break
+                        try:
+                            chunk_obj = json.loads(data)
+                            choice = chunk_obj['choices'][0]
+                            delta = choice.get('delta', {}).get('content', '')
+                            if delta:
+                                chunks.append(delta)
+                                await self.send_log(LogType.STREAM_CHUNK, delta)
+                            if choice.get('finish_reason') == 'length':
+                                self.logger.warning('Knowledge LLM response truncated (finish_reason=length)')
+                        except Exception:
+                            continue
+            result = ''.join(chunks).strip()
+            result = re.sub(r'\n?\[END\]\s*$', '', result).strip()
+            return result or None
+        except Exception as e:
+            self.logger.exception('Knowledge LLM call failed: %s', e)
+            return None
+
+    def _get_page(self, flow, page_uuid: str) -> dict | None:
+        pages = flow.structure if isinstance(flow.structure, list) else []
+        return next(
+            (p for p in pages if isinstance(p, dict) and str(p.get('uuid')) == page_uuid),
+            None,
+        )
+
+    @database_sync_to_async
+    def get_session(self, session_uuid: str, user_id: int):
+        return (
+            OnboardingSession.objects
+            .select_related('flow', 'role')
+            .filter(uuid=session_uuid, user_id=user_id)
+            .first()
+        )
+
+    @database_sync_to_async
+    def save_page_help(self, session, page_uuid: str, user_message: str, assistant_message: str):
+        state = session.state or {}
+        page_help = state.get('page_help', {})
+        if not isinstance(page_help, dict):
+            page_help = {}
+
+        thread = page_help.get(page_uuid, [])
+        if not isinstance(thread, list):
+            thread = []
+
+        thread.append({
+            'question': user_message,
+            'answer': assistant_message,
+            'timestamp': timezone.now().isoformat(),
+        })
+
+        page_help[page_uuid] = thread[-20:]
+        state['page_help'] = page_help
+        session.state = state
+        session.save(update_fields=['state', 'updated_at'])
+
+    @database_sync_to_async
+    def save_page_override(self, session, page_uuid: str, new_body: str):
+        state = session.state if isinstance(session.state, dict) else {}
+        overrides = state.get('page_overrides', {})
+        if not isinstance(overrides, dict):
+            overrides = {}
+        overrides[page_uuid] = new_body
+        state['page_overrides'] = overrides
+        session.state = state
+        session.save(update_fields=['state', 'updated_at'])
+
+    @database_sync_to_async
+    def log_interaction(
+        self, session, user_message: str, assistant_message: str,
+        page_uuid: str, mode: str, updated_page: bool,
+    ):
+        AgentInteractionLog.objects.create(
+            session=session,
+            sender_type='user',
+            content=user_message,
+            tool_call_metadata={'action': 'ask_ka', 'page_uuid': page_uuid, 'mode': mode},
+        )
+        AgentInteractionLog.objects.create(
+            session=session,
+            sender_type='ai',
+            content=assistant_message,
+            tool_call_metadata={
+                'action': 'ask_ka_response',
+                'page_uuid': page_uuid,
+                'mode': mode,
+                'updated_page': updated_page,
+            },
+        )
--- a/apps/onboarding/consumers/progress.py
+++ b/apps/onboarding/consumers/progress.py
@ -47,12 +47,10 @@ class OnboardingProgressConsumer(BaseOnboardingConsumer):
            flow_uuid=self.flow_uuid,
        )

-        feedback = await self.orchestrate(
-            OnboardingPrompts.progress_monitoring_prompt(progress_context),
+        feedback = await self.stream_llm(
            monitor_config,
-            minimum_turns=1,
+            OnboardingPrompts.progress_monitoring_prompt(progress_context),
            max_tokens=640,
-            raise_on_error=True
        )

        await self.send_log(LogType.COMPLETED, "Progress analysis complete.", {
--- a/apps/onboarding/consumers/prompts.py
+++ b/apps/onboarding/consumers/prompts.py
@ -1,6 +1,6 @@
 import json

-__all__ = ["OnboardingPrompts"]
+__all__ = ['OnboardingPrompts']

 class OnboardingPrompts:

@ -69,3 +69,52 @@ class OnboardingPrompts:
            "Keep it short and practical.\n\n"
            f"Progress context JSON:\n{json.dumps(progress_context)}"
        )
+
+    FALLBACK_SYSTEM_PROMPT = 'You are a helpful onboarding assistant.'
+
+    KA_HELP_FALLBACK = (
+        "I couldn't reach the knowledge model right now. "
+        "Please try again, or clarify which part of this module is confusing and I can provide a shorter explanation."
+    )
+
+    @staticmethod
+    def grading_prompt(ai_fields, page_responses):
+        return (
+            'You are grading a completed onboarding final quiz. '
+            'Evaluate each learner answer for correctness using the question prompt and validation hints. '
+            'Do NOT grade multiple-choice select questions here; they are graded separately. '
+            'Grade only the provided non-select questions (for example short-answer/textarea). '
+            'For short-answer questions, use validation.accepted_answers semantically and allow equivalent phrasing. '
+            'For incorrect answers, provide a brief coaching reason that explains what is missing or incorrect, '
+            'but DO NOT reveal the correct answer, exact option text, or accepted-answer phrases. '
+            'Keep each reason to one short sentence. '
+            'Return ONLY JSON object with keys: correct_count (int), gradable_count (int), per_question (array of '
+            '{key, correct, reason}). Do not include markdown.'
+            f"\n\nQuiz fields JSON:\n{json.dumps(ai_fields, ensure_ascii=False)}"
+            f"\n\nLearner answers JSON:\n{json.dumps(page_responses, ensure_ascii=False)}"
+        )
+
+    @staticmethod
+    def ka_help_prompt(role_name, page_title, page_body, user_message):
+        return (
+            "Help the learner understand this onboarding page. Keep the explanation concise and practical. "
+            "Use markdown with bullets when useful.\n\n"
+            f"Role: {role_name}\n"
+            f"Page Title: {page_title}\n"
+            f"Page Body (excerpt): {str(page_body)[:2000]}\n"
+            f"Learner question: {user_message}"
+        )
+
+    @staticmethod
+    def ka_page_revision_prompt(role_name, page_title, page_body, user_message):
+        return (
+            "Revise the onboarding page content by integrating the learner's clarification request directly into the main page text. "
+            "Use the current page as the source of truth, preserve useful structure, and improve clarity and examples where needed. "
+            "Do not append a separate 'Clarification' section. "
+            "Return ONLY the fully revised markdown page body. "
+            "When you have finished the revision, write [END] on its own line and stop.\n\n"
+            f"Role: {role_name}\n"
+            f"Page Title: {page_title}\n"
+            f"Learner clarification request: {user_message}\n\n"
+            f"Current page markdown:\n{str(page_body)[:12000]}"
+        )
--- a/apps/onboarding/mixins.py
+++ b/apps/onboarding/mixins.py
@ -0,0 +1,10 @@
+__all__ = ['RequestParamMixin']
+
+class RequestParamMixin:
+    """Resolve a named parameter from the query string, falling back to the request body."""
+
+    def _get_param(self, name: str) -> str | None:
+        value = self.request.query_params.get(name)
+        if not value:
+            value = self.request.data.get(name)
+        return value or None
--- a/apps/onboarding/routing.py
+++ b/apps/onboarding/routing.py
@ -1,9 +1,15 @@
 from django.urls import path

-from apps.onboarding.consumers import OnboardingChatConsumer, OnboardingGenerateConsumer, OnboardingProgressConsumer
+from apps.onboarding.consumers import (
+    OnboardingChatConsumer,
+    OnboardingGenerateConsumer,
+    OnboardingKnowledgeConsumer,
+    OnboardingProgressConsumer,
+)

 websocket_urlpatterns = [
    path("ws/onboarding/chat/<uuid:config_uuid>/", OnboardingChatConsumer.as_asgi()),
    path("ws/onboarding/generate/<uuid:role_uuid>/", OnboardingGenerateConsumer.as_asgi()),
+    path("ws/onboarding/knowledge/<uuid:session_uuid>/", OnboardingKnowledgeConsumer.as_asgi()),
    path("ws/onboarding/progress/<uuid:role_uuid>/<uuid:flow_uuid>/<uuid:user_uuid>/", OnboardingProgressConsumer.as_asgi()),
 ]
--- a/apps/onboarding/viewsets.py
+++ b/apps/onboarding/viewsets.py
@ -1,6 +1,7 @@
 import httpx
 import json
 import re
+
 from django.conf import settings
 from django.db import transaction
 from django.db.models import Q
@ -9,16 +10,28 @@ from rest_framework.decorators import action
 from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
-from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_403_FORBIDDEN
+from rest_framework.status import (
+    HTTP_200_OK,
+    HTTP_201_CREATED,
+    HTTP_400_BAD_REQUEST,
+    HTTP_403_FORBIDDEN,
+)
 from rest_framework.viewsets import ModelViewSet, ReadOnlyModelViewSet

-from apps.accounts.models import Organization, Role, User
+from apps.accounts.models import Organization, Role
 from apps.accounts.permissions import CanManageOrganization, can_manage_organization
+from apps.onboarding.consumers.prompts import OnboardingPrompts
+from apps.onboarding.mixins import RequestParamMixin
 from apps.onboarding.models import AgentConfig, AgentInteractionLog, OnboardingFlow, OnboardingSession
-from apps.onboarding.serializers import AgentConfigSerializer, AgentInteractionLogSerializer, OnboardingFlowSerializer, OnboardingSessionSerializer
+from apps.onboarding.serializers import (
+    AgentConfigSerializer,
+    AgentInteractionLogSerializer,
+    OnboardingFlowSerializer,
+    OnboardingSessionSerializer,
+)


-class OnboardingFlowViewSet(ModelViewSet):
+class OnboardingFlowViewSet(RequestParamMixin, ModelViewSet):
    queryset = OnboardingFlow.objects.all()
    serializer_class = OnboardingFlowSerializer
    permission_classes = [IsAuthenticated]
@ -37,16 +50,10 @@ class OnboardingFlowViewSet(ModelViewSet):
            Q(role__organization__members=user)
        ).distinct().order_by('-created_at')

-        organization_uuid = self.request.query_params.get('organization_uuid')
-        if organization_uuid in (None, ''):
-            organization_uuid = self.request.data.get('organization_uuid')
-        if organization_uuid:
+        if organization_uuid := self._get_param('organization_uuid'):
            queryset = queryset.filter(role__organization__uuid=organization_uuid)

-        role_uuid = self.request.query_params.get('role_uuid')
-        if role_uuid in (None, ''):
-            role_uuid = self.request.data.get('role_uuid')
-        if role_uuid:
+        if role_uuid := self._get_param('role_uuid'):
            queryset = queryset.filter(role__uuid=role_uuid)

        return queryset
@ -150,44 +157,27 @@ class OnboardingFlowViewSet(ModelViewSet):
            )

        session = OnboardingSession.objects.filter(user=request.user, role=flow.role, flow=flow).first()
-        created = False

        if not session:
-            # Backward compatibility for legacy sessions before flow FK existed.
-            legacy_session = OnboardingSession.objects.filter(
+            session = OnboardingSession.objects.create(
                user=request.user,
                role=flow.role,
-                flow__isnull=True,
-            ).order_by('-updated_at').first()
-
-            if legacy_session:
-                session = legacy_session
-            else:
-                session = OnboardingSession.objects.create(
-                    user=request.user,
-                    role=flow.role,
-                    flow=flow,
-                    status='active',
-                    state={
-                        'progress': 0,
-                        'current_step': 'intro',
-                        'flow_uuid': str(flow.uuid),
-                    },
-                    active_configs={},
-                )
-                created = True
-
-        if not created:
-            state = session.state if isinstance(session.state, dict) else {}
-            state['flow_uuid'] = str(flow.uuid)
-            session.flow = flow
-            session.state = state
-            session.save(update_fields=['flow', 'state', 'updated_at'])
+                flow=flow,
+                status='active',
+                state={
+                    'progress': 0,
+                    'current_step': 'intro',
+                },
+                active_configs={},
+            )
+            serializer = OnboardingSessionSerializer(session)
+            return Response(serializer.data, status=HTTP_201_CREATED)

        serializer = OnboardingSessionSerializer(session)
-        return Response(serializer.data, status=HTTP_201_CREATED if created else HTTP_200_OK)
+        return Response(serializer.data, status=HTTP_200_OK)

-class AgentConfigViewSet(ModelViewSet):
+
+class AgentConfigViewSet(RequestParamMixin, ModelViewSet):
    queryset = AgentConfig.objects.all()
    serializer_class = AgentConfigSerializer
    permission_classes = [IsAuthenticated]
@ -204,24 +194,16 @@ class AgentConfigViewSet(ModelViewSet):
            Q(organization__owner=self.request.user) | Q(organization__members=self.request.user)
        ).distinct().order_by('-updated_at')

-        organization_uuid = self.request.query_params.get('organization_uuid')
-        if organization_uuid in (None, ''):
-            organization_uuid = self.request.data.get('organization_uuid')
-        if organization_uuid:
+        if organization_uuid := self._get_param('organization_uuid'):
            queryset = queryset.filter(organization__uuid=organization_uuid)

-        role_uuid = self.request.query_params.get('role_uuid')
-        if role_uuid in (None, ''):
-            role_uuid = self.request.data.get('role_uuid')
-        if role_uuid:
+        if role_uuid := self._get_param('role_uuid'):
            queryset = queryset.filter(role__uuid=role_uuid)

        return queryset

    def create(self, request, *args, **kwargs):
-        organization_uuid = request.query_params.get('organization_uuid')
-        if organization_uuid in (None, ''):
-            organization_uuid = request.data.get('organization_uuid')
+        organization_uuid = self._get_param('organization_uuid')
        if not organization_uuid:
            raise ValidationError({'organization_uuid': 'organization_uuid is required.'})

@ -261,43 +243,21 @@ class AgentConfigViewSet(ModelViewSet):
        return Response(serializer.data, status=HTTP_201_CREATED)

    def update(self, request, *args, **kwargs):
-        config = self.get_object()
-
-        updatable_fields = {
-            'name': request.data.get('name'),
-            'agent_type': request.data.get('agent_type'),
-            'system_prompt': request.data.get('system_prompt'),
-            'llm_config': request.data.get('llm_config'),
-        }
-
-        for field, value in updatable_fields.items():
-            if value is not None:
-                setattr(config, field, value)
-
-        config.save(update_fields=['name', 'agent_type', 'system_prompt', 'llm_config', 'updated_at'])
-        serializer = self.get_serializer(config)
-        return Response(serializer.data, status=HTTP_200_OK)
+        return self.partial_update(request, *args, **kwargs)

    def partial_update(self, request, *args, **kwargs):
        config = self.get_object()
+        fields = ['name', 'agent_type', 'system_prompt', 'llm_config']
+        for field in fields:
+            if field in request.data and request.data.get(field) is not None:
+                setattr(config, field, request.data[field])

-        if 'name' in request.data:
-            config.name = request.data.get('name')
-        if 'agent_type' in request.data:
-            config.agent_type = request.data.get('agent_type')
-        if 'system_prompt' in request.data:
-            config.system_prompt = request.data.get('system_prompt')
-        if 'llm_config' in request.data:
-            config.llm_config = request.data.get('llm_config')
-
-        config.save(update_fields=['name', 'agent_type', 'system_prompt', 'llm_config', 'updated_at'])
+        config.save(update_fields=fields + ['updated_at'])
        serializer = self.get_serializer(config)
        return Response(serializer.data, status=HTTP_200_OK)

-    def destroy(self, request, *args, **kwargs):
-        return super().destroy(request, *args, **kwargs)

-class OnboardingSessionViewSet(ModelViewSet):
+class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
    queryset = OnboardingSession.objects.all()
    serializer_class = OnboardingSessionSerializer
    permission_classes = [IsAuthenticated]
@ -310,34 +270,21 @@ class OnboardingSessionViewSet(ModelViewSet):
        else:
            queryset = OnboardingSession.objects.filter(user=user)

-        organization_uuid = self.request.query_params.get('organization_uuid')
-        if organization_uuid in (None, ''):
-            organization_uuid = self.request.data.get('organization_uuid')
-        if organization_uuid:
+        if organization_uuid := self._get_param('organization_uuid'):
            queryset = queryset.filter(role__organization__uuid=organization_uuid)

-        role_uuid = self.request.query_params.get('role_uuid')
-        if role_uuid in (None, ''):
-            role_uuid = self.request.data.get('role_uuid')
-        if role_uuid:
+        if role_uuid := self._get_param('role_uuid'):
            queryset = queryset.filter(role__uuid=role_uuid)

-        user_uuid = self.request.query_params.get('user_uuid')
-        if user_uuid in (None, ''):
-            user_uuid = self.request.data.get('user_uuid')
-        if user_uuid:
+        if user_uuid := self._get_param('user_uuid'):
            if not user.is_manager and str(user.uuid) != str(user_uuid):
                raise PermissionDenied('You can only view your own progress sessions.')
            queryset = queryset.filter(user__uuid=user_uuid)

-        flow_uuid = self.request.query_params.get('flow_uuid')
-        if flow_uuid in (None, ''):
-            flow_uuid = self.request.data.get('flow_uuid')
-        if flow_uuid:
-            queryset = queryset.filter(Q(flow__uuid=flow_uuid) | Q(flow__isnull=True, state__flow_uuid=str(flow_uuid)))
+        if flow_uuid := self._get_param('flow_uuid'):
+            queryset = queryset.filter(flow__uuid=flow_uuid)

-        status_value = self.request.query_params.get('status')
-        if status_value:
+        if status_value := self.request.query_params.get('status'):
            queryset = queryset.filter(status=status_value)

        return queryset.order_by('-created_at')
@ -381,12 +328,10 @@ class OnboardingSessionViewSet(ModelViewSet):

            latest_by_user_flow = {}
            for session in role_sessions:
-                state = session.state if isinstance(session.state, dict) else {}
-                session_flow_uuid = str(session.flow.uuid) if session.flow_id else str(state.get('flow_uuid') or '')
-                if not session_flow_uuid:
+                if not session.flow_id:
                    continue

-                key = (session.user_id, session_flow_uuid)
+                key = (session.user_id, str(session.flow.uuid))
                if key not in latest_by_user_flow:
                    latest_by_user_flow[key] = session

@ -443,24 +388,11 @@ class OnboardingSessionViewSet(ModelViewSet):
        if isinstance(value, str):
            return bool(value.strip())
        if isinstance(value, (list, dict, tuple, set)):
-            return len(value) > 0
+            return bool(value)
        return True

    def _get_flow_for_session(self, session):
-        if session.flow_id:
-            return session.flow
-
-        state = session.state or {}
-        flow_uuid = state.get('flow_uuid')
-
-        flow = None
-        if flow_uuid:
-            flow = OnboardingFlow.objects.filter(uuid=flow_uuid, role=session.role).first()
-
-        if not flow:
-            flow = OnboardingFlow.objects.filter(role=session.role, is_active=True).order_by('-updated_at').first()
-
-        return flow
+        return session.flow

    def _get_page_from_flow(self, flow, page_uuid):
        pages = flow.structure if isinstance(flow.structure, list) else []
@ -521,15 +453,12 @@ class OnboardingSessionViewSet(ModelViewSet):
        session.save(update_fields=['state', 'updated_at'])

    def _build_system_prompt(self, config):
-        if not config:
-            return "You are a helpful onboarding assistant."
-        base_prompt = config.system_prompt or "You are a helpful onboarding assistant."
-        return base_prompt
+        return (config and config.system_prompt) or OnboardingPrompts.FALLBACK_SYSTEM_PROMPT

-    def _get_knowledge_agent_config(self, session):
+    def _get_agent_config(self, session, agent_type):
        role_specific = AgentConfig.objects.filter(
            role=session.role,
-            agent_type='knowledge',
+            agent_type=agent_type,
        ).order_by('-updated_at').first()
        if role_specific:
            return role_specific
@ -537,21 +466,7 @@ class OnboardingSessionViewSet(ModelViewSet):
        return AgentConfig.objects.filter(
            organization=session.role.organization,
            role__isnull=True,
-            agent_type='knowledge',
-        ).order_by('-updated_at').first()
-
-    def _get_assessment_agent_config(self, session):
-        role_specific = AgentConfig.objects.filter(
-            role=session.role,
-            agent_type='assessment',
-        ).order_by('-updated_at').first()
-        if role_specific:
-            return role_specific
-
-        return AgentConfig.objects.filter(
-            organization=session.role.organization,
-            role__isnull=True,
-            agent_type='assessment',
+            agent_type=agent_type,
        ).order_by('-updated_at').first()

    def _extract_json_object(self, text):
@ -627,24 +542,11 @@ class OnboardingSessionViewSet(ModelViewSet):
        ai_per_question = []

        if ai_fields:
-            config = self._get_assessment_agent_config(session) or self._get_knowledge_agent_config(session)
+            config = self._get_agent_config(session, 'assessment') or self._get_agent_config(session, 'knowledge')
            if not config:
                return None, {'error': 'No assessment/knowledge agent configured for grading.'}

-            prompt = (
-                'You are grading a completed onboarding final quiz. '
-                'Evaluate each learner answer for correctness using the question prompt and validation hints. '
-                'Do NOT grade multiple-choice select questions here; they are graded separately. '
-                'Grade only the provided non-select questions (for example short-answer/textarea). '
-                'For short-answer questions, use validation.accepted_answers semantically and allow equivalent phrasing. '
-                'For incorrect answers, provide a brief coaching reason that explains what is missing or incorrect, '
-                'but DO NOT reveal the correct answer, exact option text, or accepted-answer phrases. '
-                'Keep each reason to one short sentence. '
-                'Return ONLY JSON object with keys: correct_count (int), gradable_count (int), per_question (array of '
-                '{key, correct, reason}). Do not include markdown.'
-                f"\n\nQuiz fields JSON:\n{json.dumps(ai_fields, ensure_ascii=False)}"
-                f"\n\nLearner answers JSON:\n{json.dumps(page_responses, ensure_ascii=False)}"
-            )
+            prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)

            try:
                with httpx.Client(timeout=60.0) as client:
@ -751,118 +653,6 @@ class OnboardingSessionViewSet(ModelViewSet):

        return sanitized

-    def _run_ka_help(self, session, page_title, page_body, user_message):
-        config = self._get_knowledge_agent_config(session)
-        fallback = (
-            "I couldn't reach the knowledge model right now. "
-            "Please try again, or clarify which part of this module is confusing and I can provide a shorter explanation."
-        )
-
-        if not config:
-            return fallback
-
-        prompt = (
-            "Help the learner understand this onboarding page. Keep the explanation concise and practical. "
-            "Use markdown with bullets when useful.\n\n"
-            f"Role: {session.role.name}\n"
-            f"Page Title: {page_title}\n"
-            f"Page Body (excerpt): {str(page_body)[:2000]}\n"
-            f"Learner question: {user_message}"
-        )
-
-        try:
-            with httpx.Client(timeout=60.0) as client:
-                response = client.post(
-                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
-                    json={
-                        "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
-                        "messages": [
-                            {"role": "system", "content": self._build_system_prompt(config)},
-                            {"role": "user", "content": prompt},
-                        ],
-                    },
-                )
-                response.raise_for_status()
-                res_json = response.json()
-                content = res_json.get('choices', [{}])[0].get('message', {}).get('content')
-                if isinstance(content, str) and content.strip():
-                    return content.strip()
-        except Exception:
-            pass
-
-        return fallback
-
-    def _run_ka_page_revision(self, session, page_title, page_body, user_message):
-        config = self._get_knowledge_agent_config(session)
-        if not config:
-            return None
-
-        prompt = (
-            "Revise the onboarding page content by integrating the learner's clarification request directly into the main page text. "
-            "Use the current page as the source of truth, preserve useful structure, and improve clarity and examples where needed. "
-            "Do not append a separate 'Clarification' section. Return ONLY the fully revised markdown page body.\n\n"
-            f"Role: {session.role.name}\n"
-            f"Page Title: {page_title}\n"
-            f"Learner clarification request: {user_message}\n\n"
-            f"Current page markdown:\n{str(page_body)[:12000]}"
-        )
-
-        try:
-            with httpx.Client(timeout=60.0) as client:
-                response = client.post(
-                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
-                    json={
-                        "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"),
-                        "messages": [
-                            {"role": "system", "content": self._build_system_prompt(config)},
-                            {"role": "user", "content": prompt},
-                        ],
-                    },
-                )
-                response.raise_for_status()
-                res_json = response.json()
-                content = res_json.get('choices', [{}])[0].get('message', {}).get('content')
-                revised = str(content or '').strip()
-                if revised:
-                    return revised
-        except Exception:
-            pass
-
-        return None
-
-    def _append_page_help(self, session, page_uuid, user_message, assistant_message):
-        state = session.state or {}
-        page_help = state.get('page_help', {})
-        if not isinstance(page_help, dict):
-            page_help = {}
-
-        thread = page_help.get(str(page_uuid), [])
-        if not isinstance(thread, list):
-            thread = []
-
-        thread.append({
-            'question': str(user_message),
-            'answer': str(assistant_message),
-            'timestamp': timezone.now().isoformat(),
-        })
-
-        page_help[str(page_uuid)] = thread[-20:]
-        state['page_help'] = page_help
-        session.state = state
-        session.save(update_fields=['state', 'updated_at'])
-
-    def _save_session_page_override(self, session, page_uuid, new_body):
-        state = session.state if isinstance(session.state, dict) else {}
-        overrides = state.get('page_overrides', {})
-        if not isinstance(overrides, dict):
-            overrides = {}
-
-        overrides[str(page_uuid)] = str(new_body)
-        state['page_overrides'] = overrides
-        session.state = state
-        session.save(update_fields=['state', 'updated_at'])
-        return True
-
    def _evaluate_final_quiz(self, session):
        flow = self._get_flow_for_session(session)
        if not flow:
@ -1005,81 +795,11 @@ class OnboardingSessionViewSet(ModelViewSet):
                tool_call_metadata={'page_uuid': page_uuid, 'has_responses': isinstance(responses, dict)}
            )

-                             
-        
        return Response({
            'status': 'received',
            'session_state': session.state,
        })

-    @action(detail=True, methods=['post'], url_path='ask-ka')
-    def ask_ka(self, request, uuid=None):
-        session = self.get_object()
-        page_uuid = request.data.get('page_uuid')
-        user_message = request.data.get('message')
-        mode = request.data.get('mode', 'separate')
-
-        if not page_uuid or not user_message:
-            return Response({'error': 'page_uuid and message are required.'}, status=HTTP_400_BAD_REQUEST)
-
-        flow = self._get_flow_for_session(session)
-        if not flow:
-            return Response({'error': 'Onboarding flow not found.'}, status=HTTP_400_BAD_REQUEST)
-
-        page, _ = self._get_page_from_flow(flow, page_uuid)
-        if not isinstance(page, dict):
-            return Response({'error': 'Page not found for this flow.'}, status=HTTP_400_BAD_REQUEST)
-
-        page_title = str(page.get('title') or 'Onboarding Page')
-        page_body = str(page.get('body') or '')
-        updated_page = False
-        assistant_message = ''
-        revised_body = None
-        if str(mode) == 'update_page':
-            revised_body = self._run_ka_page_revision(session, page_title, page_body, str(user_message))
-            if revised_body:
-                updated_page = self._save_session_page_override(session, page_uuid, revised_body)
-                if updated_page:
-                    assistant_message = (
-                        "Updated this page by integrating your clarification request into the core content. "
-                        "Please review the revised page text above."
-                    )
-
-        if not assistant_message:
-            assistant_message = self._run_ka_help(session, page_title, page_body, str(user_message))
-
-        self._append_page_help(session, page_uuid, user_message, assistant_message)
-
-        AgentInteractionLog.objects.create(
-            session=session,
-            sender_type='user',
-            content=str(user_message),
-            tool_call_metadata={
-                'action': 'ask_ka',
-                'page_uuid': str(page_uuid),
-                'mode': str(mode),
-            },
-        )
-        AgentInteractionLog.objects.create(
-            session=session,
-            sender_type='ai',
-            content=str(assistant_message),
-            tool_call_metadata={
-                'action': 'ask_ka_response',
-                'page_uuid': str(page_uuid),
-                'mode': str(mode),
-                'updated_page': updated_page,
-            },
-        )
-
-        return Response({
-            'status': 'ok',
-            'answer': assistant_message,
-            'updated_page': updated_page,
-            'revised_page_body': revised_body if str(mode) == 'update_page' else None,
-            'session_state': session.state,
-        }, status=HTTP_200_OK)
-
    @action(detail=True, methods=['get'], url_path='history')
    def history(self, request, uuid=None):
        session = self.get_object()
@ -1118,7 +838,8 @@ class OnboardingSessionViewSet(ModelViewSet):
        session.save(update_fields=['status', 'completed_at', 'state', 'updated_at'])
        return Response({'message': 'Session marked as completed', 'quiz_result': quiz_result})

-class AgentInteractionLogViewSet(ReadOnlyModelViewSet):
+
+class AgentInteractionLogViewSet(RequestParamMixin, ReadOnlyModelViewSet):
    queryset = AgentInteractionLog.objects.all()
    serializer_class = AgentInteractionLogSerializer
    permission_classes = [IsAuthenticated]
@ -1136,22 +857,13 @@ class AgentInteractionLogViewSet(ReadOnlyModelViewSet):
            manager_scope
        ).distinct()

-        session_uuid = self.request.query_params.get('session_uuid')
-        if session_uuid in (None, ''):
-            session_uuid = self.request.data.get('session_uuid')
-        if session_uuid:
+        if session_uuid := self._get_param('session_uuid'):
            queryset = queryset.filter(session__uuid=session_uuid)

-        role_uuid = self.request.query_params.get('role_uuid')
-        if role_uuid in (None, ''):
-            role_uuid = self.request.data.get('role_uuid')
-        if role_uuid:
+        if role_uuid := self._get_param('role_uuid'):
            queryset = queryset.filter(session__role__uuid=role_uuid)

-        organization_uuid = self.request.query_params.get('organization_uuid')
-        if organization_uuid in (None, ''):
-            organization_uuid = self.request.data.get('organization_uuid')
-        if organization_uuid:
+        if organization_uuid := self._get_param('organization_uuid'):
            queryset = queryset.filter(session__role__organization__uuid=organization_uuid)

        return queryset.order_by('created_at')
--- a/site/src/router/api.ts
+++ b/site/src/router/api.ts
@ -147,7 +147,6 @@ export const API = {
            progressOverview: () => 'onboarding-session/progress-overview/',
            byId: (uuid: string) => `onboarding-session/${uuid}/`,
            interact: (uuid: string) => `onboarding-session/${uuid}/interact/`,
-            askKa: (uuid: string) => `onboarding-session/${uuid}/ask-ka/`,
            history: (uuid: string) => `onboarding-session/${uuid}/history/`,
            complete: (uuid: string) => `onboarding-session/${uuid}/complete/`,
        },
--- a/site/src/stores/agentStore.ts
+++ b/site/src/stores/agentStore.ts
@ -14,6 +14,7 @@ export const useAgentStore = defineStore('agent', () => {
    const eventLog = ref<AgentEvent[]>([])
    const lastExecutionId = ref<string | null>(null)
    const socket = ref<WebSocket | null>(null)
+    const streamBuffer = ref('')

    let currentUrl = ''
    let reconnectAttempts = 0
@ -68,7 +69,10 @@ export const useAgentStore = defineStore('agent', () => {
                    lastExecutionId.value = String(payload.execution_id)
                }

-                if (type === 'status' || type === 'thought' || type === 'tool_start') {
+                if (type === 'stream_chunk') {
+                    executionStatus.value = 'running'
+                    streamBuffer.value += payload.message || ''
+                } else if (type === 'status' || type === 'thought' || type === 'tool_start') {
                    executionStatus.value = 'running'
                    pushEvent({
                        type,
@ -87,6 +91,7 @@ export const useAgentStore = defineStore('agent', () => {
                    })
                } else if (type === 'completed') {
                    executionStatus.value = 'completed'
+                    streamBuffer.value = ''
                    pushEvent({
                        type: 'completed',
                        message: 'Generation loop finished successfully',
@ -117,6 +122,7 @@ export const useAgentStore = defineStore('agent', () => {
        intentionalClose = false
        clearReconnectTimer()
        reconnectAttempts = 0
+        streamBuffer.value = ''

        if (socket.value) {
            socket.value.close()
@ -172,6 +178,7 @@ export const useAgentStore = defineStore('agent', () => {
        executionStatus,
        eventLog,
        socket,
+        streamBuffer,
        connect,
        disconnect,
        startAgent,
--- a/site/src/stores/kaStore.ts
+++ b/site/src/stores/kaStore.ts
@ -0,0 +1,135 @@
+import { defineStore } from 'pinia'
+import { ref } from 'vue'
+import { BACKOFF_BASE_MS, BACKOFF_MAX_MS, BACKOFF_MAX_ATTEMPTS } from './agentBackoff'
+
+export type KaResponse = {
+    answer: string
+    updatedPage: boolean
+    revisedPageBody: string | null
+}
+
+type PendingRequest = {
+    resolve: (value: KaResponse) => void
+    reject: (reason: string) => void
+}
+
+export const useKaStore = defineStore('ka', () => {
+    const isConnected = ref(false)
+    const isAsking = ref(false)
+    const streamBuffer = ref('')
+    const socket = ref<WebSocket | null>(null)
+
+    let currentUrl = ''
+    let reconnectAttempts = 0
+    let reconnectTimer: ReturnType<typeof setTimeout> | null = null
+    let intentionalClose = false
+    let pending: PendingRequest | null = null
+
+    const clearReconnectTimer = () => {
+        if (reconnectTimer !== null) {
+            clearTimeout(reconnectTimer)
+            reconnectTimer = null
+        }
+    }
+
+    const scheduleReconnect = () => {
+        if (reconnectAttempts >= BACKOFF_MAX_ATTEMPTS) return
+        const delay = Math.min(BACKOFF_BASE_MS * 2 ** reconnectAttempts, BACKOFF_MAX_MS)
+        reconnectAttempts++
+        reconnectTimer = setTimeout(() => {
+            if (!intentionalClose) openSocket(currentUrl)
+        }, delay)
+    }
+
+    const openSocket = (url: string) => {
+        socket.value = new WebSocket(url)
+
+        socket.value.onopen = () => {
+            reconnectAttempts = 0
+            isConnected.value = true
+        }
+
+        socket.value.onmessage = (event) => {
+            try {
+                const payload = JSON.parse(event.data)
+                if (payload.type === 'stream_chunk') {
+                    streamBuffer.value += payload.message || ''
+                } else if (payload.type === 'completed') {
+                    isAsking.value = false
+                    streamBuffer.value = ''
+                    const content = payload.content || {}
+                    pending?.resolve({
+                        answer: payload.message || '',
+                        updatedPage: Boolean(content.updated_page),
+                        revisedPageBody: content.revised_page_body ?? null,
+                    })
+                    pending = null
+                } else if (payload.type === 'error') {
+                    isAsking.value = false
+                    streamBuffer.value = ''
+                    pending?.reject(payload.message || 'KA error')
+                    pending = null
+                }
+            } catch (e) {
+                console.error('KA store message error', e)
+            }
+        }
+
+        socket.value.onclose = (event) => {
+            isConnected.value = false
+            if (!intentionalClose && event.code !== 1000) {
+                scheduleReconnect()
+            }
+        }
+    }
+
+    const connect = (sessionUuid: string) => {
+        intentionalClose = false
+        clearReconnectTimer()
+        reconnectAttempts = 0
+
+        if (socket.value) {
+            socket.value.close()
+            socket.value = null
+        }
+
+        const wsProtocol = window.location.protocol === 'https:' ? 'wss' : 'ws'
+        currentUrl = `${wsProtocol}://${window.location.host}/ws/onboarding/knowledge/${sessionUuid}/`
+        openSocket(currentUrl)
+    }
+
+    const disconnect = () => {
+        intentionalClose = true
+        clearReconnectTimer()
+        if (pending) {
+            pending.reject('Disconnected')
+            pending = null
+        }
+        isAsking.value = false
+        if (socket.value) {
+            socket.value.close()
+            socket.value = null
+        }
+        isConnected.value = false
+    }
+
+    const ask = (
+        pageUuid: string,
+        userMessage: string,
+        mode: 'separate' | 'update_page',
+    ): Promise<KaResponse> => {
+        return new Promise((resolve, reject) => {
+            if (!socket.value || socket.value.readyState !== WebSocket.OPEN) {
+                return reject('Not connected')
+            }
+            isAsking.value = true
+            streamBuffer.value = ''
+            pending = { resolve, reject }
+            socket.value.send(
+                JSON.stringify({ action: 'ask', page_uuid: pageUuid, message: userMessage, mode }),
+            )
+        })
+    }
+
+    return { isConnected, isAsking, streamBuffer, connect, disconnect, ask }
+})
--- a/site/src/stores/onboardingAgentStore.ts
+++ b/site/src/stores/onboardingAgentStore.ts
@ -11,6 +11,7 @@ import { BACKOFF_BASE_MS, BACKOFF_MAX_MS, BACKOFF_MAX_ATTEMPTS } from './agentBa
 export const useOnboardingAgentStore = defineStore('onboarding-agent', () => {
    const isConnected = ref(false)
    const executionStatus = ref<AgentExecutionStatus>('idle')
+    const currentPhase = ref<'curriculum' | 'knowledge' | 'assessment' | null>(null)
    const eventLog = ref<AgentEvent[]>([])
    const lastExecutionId = ref<string | null>(null)
    const socket = ref<WebSocket | null>(null)
@ -70,6 +71,10 @@ export const useOnboardingAgentStore = defineStore('onboarding-agent', () => {

                if (type === 'status' || type === 'thought' || type === 'tool_start') {
                    executionStatus.value = 'running'
+                    if (type === 'status' && typeof payload.content === 'string' &&
+                        ['curriculum', 'knowledge', 'assessment'].includes(payload.content)) {
+                        currentPhase.value = payload.content as 'curriculum' | 'knowledge' | 'assessment'
+                    }
                    pushEvent({
                        type,
                        message: payload.message || payload.thought,
@ -87,6 +92,7 @@ export const useOnboardingAgentStore = defineStore('onboarding-agent', () => {
                    })
                } else if (type === 'completed') {
                    executionStatus.value = 'completed'
+                    currentPhase.value = null
                    pushEvent({
                        type: 'completed',
                        message: 'Generation loop finished successfully',
@ -95,6 +101,7 @@ export const useOnboardingAgentStore = defineStore('onboarding-agent', () => {
                    })
                } else if (type === 'error') {
                    executionStatus.value = 'failed'
+                    currentPhase.value = null
                    pushEvent({ type: 'error', message: payload.message })
                }
            } catch (e) {
@ -137,6 +144,7 @@ export const useOnboardingAgentStore = defineStore('onboarding-agent', () => {
        }
        isConnected.value = false
        executionStatus.value = 'idle'
+        currentPhase.value = null
    }

    const startAgent = (data: AgentStartPayload) => {
@ -170,6 +178,7 @@ export const useOnboardingAgentStore = defineStore('onboarding-agent', () => {
    return {
        isConnected,
        executionStatus,
+        currentPhase,
        eventLog,
        socket,
        connect,
--- a/site/src/views/AgentDetailView.vue
+++ b/site/src/views/AgentDetailView.vue
@ -13,12 +13,12 @@ import {
    Tag,
    InputNumber,
    Select,
+    Tooltip,
 } from 'ant-design-vue'
-import { marked } from 'marked'
-import DOMPurify from 'dompurify'
 import { useAgentStore } from '../stores/agentStore'
 import { apiClient, isAxiosError, API } from '../router/api'
 import type { AgentConfig, AgentRunResult } from '../types/agent'
+import { InfoCircleOutlined } from '@ant-design/icons-vue'

 const route = useRoute()
 const agentStore = useAgentStore()
@ -142,13 +142,7 @@ const saveConfig = async () => {
    }
 }

-const renderedAgentResponse = computed(() => {
-    const rawMarkdown = agentResponse.value
-    if (!rawMarkdown) return ''
-
-    const html = marked.parse(rawMarkdown) as string
-    return DOMPurify.sanitize(html)
-})
+const displayedResponse = computed(() => agentResponse.value || agentStore.streamBuffer || '')

 const startAgent = () => {
    if (!agentStore.isConnected) {
@ -260,7 +254,12 @@ onUnmounted(() => {
                    </div>

                    <div>
-                        <Typography.Text>Max Tokens:</Typography.Text>
+                        <Space :size="4" align="center" style="margin-bottom: 4px">
+                            <Typography.Text>Max Tokens</Typography.Text>
+                            <Tooltip title="The maximum number of tokens the agent can generate in a single response. One token ≈ 4 characters. If the response cuts off mid-sentence, increase this value.">
+                                <InfoCircleOutlined style="color: #9ca3af; cursor: help" />
+                            </Tooltip>
+                        </Space>
                        <InputNumber
                            v-model:value="maxTokens"
                            :min="1"
@ -283,13 +282,12 @@ onUnmounted(() => {
                </Space>
            </div>

-            <div v-if="agentResponse" class="response-section">
-                <Typography.Title :level="4" class="section-title">Final Response</Typography.Title>
+            <div v-if="agentResponse || agentStore.streamBuffer" class="response-section">
+                <Typography.Title :level="4" class="section-title">
+                    {{ agentStore.streamBuffer && !agentResponse ? 'Response' : 'Final Response' }}
+                </Typography.Title>
                <Card class="response-card response-final" :bordered="false">
-                    <div
-                        class="response-content markdown-body"
-                        v-html="renderedAgentResponse"
-                    ></div>
+                    <Typography.Text>{{ displayedResponse }}</Typography.Text>
                </Card>
            </div>
            <Typography.Title :level="4" class="section-title">Execution Log</Typography.Title>
--- a/site/src/views/OnboardingView.vue
+++ b/site/src/views/OnboardingView.vue
@ -20,6 +20,7 @@ import {
 import { CheckCircleOutlined, CloseCircleOutlined } from '@ant-design/icons-vue'
 import { apiClient, API, isAxiosError } from '../router/api'
 import { useOnboardingAgentStore } from '../stores/onboardingAgentStore'
+import { useKaStore } from '../stores/kaStore'
 import { useUserStore } from '../stores/userStore'
 import type {
    OnboardingFlow,
@ -35,6 +36,7 @@ const marked = new Marked()
 const route = useRoute()
 const router = useRouter()
 const agentStore = useOnboardingAgentStore()
+const kaStore = useKaStore()
 const userStore = useUserStore()

 const roleId = computed(() => route.params.roleId as string)
@ -63,7 +65,6 @@ type QuizResult = {

 const quizResult = ref<QuizResult | null>(null)
 const kaQuestion = ref('')
-const kaLoading = ref(false)
 const kaMode = ref<'separate' | 'update_page'>('separate')
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 const formState = reactive<Record<string, any>>({})
@ -112,7 +113,8 @@ const currentPageBody = computed(() => {

 const renderedBody = computed(() => {
    if (!currentPageBody.value) return ''
-    return DOMPurify.sanitize(marked.parse(currentPageBody.value) as string)
+    const body = currentPageBody.value.replace(/^#{1,6}\s+.+\n?/, '')
+    return DOMPurify.sanitize(marked.parse(body) as string)
 })

 const isAnswerCorrect = (value: unknown) => {
@ -263,6 +265,7 @@ const resetCurrentFlow = async () => {
        isAutoGenerating.value = false
        agentStore.disconnect()
        agentStore.clearLog()
+        kaStore.disconnect()

        message.success('Onboarding flow deleted. Generating a fresh flow...')
        await initOnboarding()
@ -353,6 +356,7 @@ watch(
        Object.keys(formState).forEach((k) => delete formState[k])
        agentStore.disconnect()
        agentStore.clearLog()
+        kaStore.disconnect()
        await initOnboarding()
    },
 )
@ -370,6 +374,7 @@ const loadFlow = async (flowUuid: string) => {
        return
    }

+    kaStore.connect(session.value.uuid)
    restorePageProgressFromSession()
    syncVisitedPages()
    hydrateFormState()
@ -554,36 +559,47 @@ const onSubmitPage = async () => {
 const askKnowledgeAgent = async () => {
    if (!session.value || !currentPage.value || !kaQuestion.value.trim()) return

-    kaLoading.value = true
-    try {
-        const response = await apiClient.post<{
-            status: string
-            answer: string
-            updated_page: boolean
-            revised_page_body?: string | null
-            session_state?: Record<string, unknown>
-        }>(API.onboarding.sessions.askKa(session.value.uuid), {
-            page_uuid: currentPage.value.uuid,
-            message: kaQuestion.value,
-            mode: kaMode.value,
-        })
+    const pageUuid = currentPage.value.uuid
+    const question = kaQuestion.value.trim()
+    kaQuestion.value = ''

-        const apiSessionState = response.data?.session_state
-        if (apiSessionState && session.value) {
-            ;(session.value as unknown as { state?: Record<string, unknown> }).state = apiSessionState
+    try {
+        const result = await kaStore.ask(pageUuid, question, kaMode.value)
+
+        const sessionObj = session.value as unknown as { state?: Record<string, unknown> }
+        const state: Record<string, unknown> = sessionObj.state ?? {}
+
+        const pageHelp: Record<string, unknown[]> =
+            state.page_help && typeof state.page_help === 'object'
+                ? { ...(state.page_help as Record<string, unknown[]>) }
+                : {}
+        const thread = Array.isArray(pageHelp[pageUuid]) ? [...pageHelp[pageUuid]] : []
+        thread.push({ question, answer: result.answer, timestamp: new Date().toISOString() })
+        pageHelp[pageUuid] = thread.slice(-20)
+        state.page_help = pageHelp
+
+        if (result.updatedPage && result.revisedPageBody) {
+            const overrides: Record<string, unknown> =
+                state.page_overrides && typeof state.page_overrides === 'object'
+                    ? { ...(state.page_overrides as Record<string, unknown>) }
+                    : {}
+            overrides[pageUuid] = result.revisedPageBody
+            state.page_overrides = overrides
        }

+        sessionObj.state = state
        syncVisitedPages()
-        kaQuestion.value = ''
    } catch {
        message.error('Could not retrieve clarification right now')
-    } finally {
-        kaLoading.value = false
+        kaQuestion.value = question
    }
 }

 onMounted(() => initOnboarding())
-onUnmounted(() => agentStore.disconnect())
+onUnmounted(() => {
+    agentStore.disconnect()
+    kaStore.disconnect()
+})

 watch(
    () => currentPageIndex.value,
@ -632,7 +648,7 @@ watch(
                </div>

                <div class="pipeline-status">
-                    <Steps size="small" :current="agentStore.executionStatus === 'running' ? 1 : 2">
+                    <Steps size="small" :current="({ curriculum: 0, knowledge: 1, assessment: 2 } as Record<string, number>)[agentStore.currentPhase ?? ''] ?? (agentStore.executionStatus === 'completed' ? 3 : -1)">
                        <Steps.Step title="Curriculum" />
                        <Steps.Step title="Knowledge" />
                        <Steps.Step title="Assessment" />
@ -724,7 +740,21 @@ watch(
                            <Typography.Title :level="4" class="white-text">
                                {{ currentPage.title }}
                            </Typography.Title>
-                            <div class="markdown-body" v-html="renderedBody"></div>
+                            <div style="position: relative">
+                                <div
+                                    v-if="kaStore.isAsking && !kaStore.streamBuffer"
+                                    style="display: flex; align-items: center; gap: 8px; margin-bottom: 12px; opacity: 0.75"
+                                >
+                                    <Spin size="small" />
+                                    <Typography.Text class="white-text" style="font-size: 13px">Generating...</Typography.Text>
+                                </div>
+                                <div
+                                    class="markdown-body"
+                                    v-html="kaStore.isAsking && kaStore.streamBuffer
+                                        ? DOMPurify.sanitize(marked.parse(kaStore.streamBuffer) as string)
+                                        : renderedBody"
+                                ></div>
+                            </div>
                            <Divider dashed style="border-color: #dbe3ec" />

                            <Form layout="vertical" :model="formState" @finish="onSubmitPage">
@ -868,7 +898,7 @@ watch(
                                            />
                                            <Button
                                                type="default"
-                                                :loading="kaLoading"
+                                                :loading="kaStore.isAsking"
                                                :disabled="!kaQuestion.trim()"
                                                @click="askKnowledgeAgent"
                                            >
--- a/site/src/views/ProgressDetailView.vue
+++ b/site/src/views/ProgressDetailView.vue
@ -123,16 +123,26 @@ const runProgressMonitor = async () => {
                ws.send(JSON.stringify({action: 'progress_monitor'}),)
            }

+            let streamingFeedback = false
+
            ws.onmessage = (event) => {
                try {
                    const payload = JSON.parse(event.data)
-                    if (payload.message) {
+                    if (payload.message && payload.type !== 'stream_chunk') {
                        monitorLogs.value.push(String(payload.message))
                    }

+                    if (payload.type === 'stream_chunk') {
+                        if (!streamingFeedback) {
+                            feedback.value = ''
+                            streamingFeedback = true
+                        }
+                        feedback.value += payload.message || ''
+                    }
+
                    if (payload.type === 'completed') {
                        feedback.value = String(
-                            payload.content?.feedback || payload.message || 'No feedback returned.',
+                            payload.content?.feedback || feedback.value || 'No feedback returned.',
                        )
                        if (monitorTimeout.value !== null) {
                            window.clearTimeout(monitorTimeout.value)