import json import logging import re import html from typing import Any from django.db import transaction from django.utils import timezone from rest_framework import status from rest_framework.exceptions import PermissionDenied from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.viewsets import ModelViewSet from asgiref.sync import async_to_sync from channels.layers import get_channel_layer from apps.mlstore.models import AgentEvent, AgentRun from apps.mlstore import services as ml_services from .models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession from .serializers import ( OnboardingFlowSerializer, OnboardingFlowDetailSerializer, OnboardingPageSerializer, OnboardingFieldSerializer, OnboardingSessionSerializer, OnboardingSubmissionSerializer, OnboardingFeedbackSerializer, ) logger = logging.getLogger(__name__) def _extract_json(text: str) -> dict[str, Any]: if not text: return {} try: return json.loads(text) except Exception: pass # Prefer fenced json blocks fenced = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, re.IGNORECASE) if fenced: try: return json.loads(fenced.group(1)) except Exception: return {} # Fallback: find first balanced JSON object start = text.find('{') if start == -1: return {} depth = 0 for idx in range(start, len(text)): char = text[idx] if char == '{': depth += 1 elif char == '}': depth -= 1 if depth == 0: candidate = text[start:idx + 1] try: return json.loads(candidate) except Exception: return {} return {} def _strip_html(text: str) -> str: if not text: return "" cleaned = re.sub(r"<[^>]+>", " ", text) cleaned = html.unescape(cleaned) return re.sub(r"\s+", " ", cleaned).strip() def _send_agent_progress_event(agent_run: AgentRun, content: dict): try: AgentEvent.objects.create( execution=agent_run, event_type='progress', content=content, ) room_group_name = f"mlstore_agent_{agent_run.agent.uuid}" async_to_sync(get_channel_layer().group_send)( room_group_name, { "type": "mlstore_event", "event_type": "progress", "content": content, "timestamp": timezone.now().isoformat(), }, ) except Exception as e: logger.warning("Failed to send progress event: %s", e) class OnboardingFlowViewSet(ModelViewSet): queryset = OnboardingFlow.objects.select_related('role', 'agent').all() serializer_class = OnboardingFlowSerializer lookup_field = 'uuid' def get_queryset(self): qs = super().get_queryset() role_uuid = self.request.query_params.get('role') status_filter = self.request.query_params.get('status') if role_uuid: qs = qs.filter(role__uuid=role_uuid) if status_filter: qs = qs.filter(status=status_filter) return qs def get_serializer_class(self): if self.action in ('retrieve', 'pages'): return OnboardingFlowDetailSerializer return super().get_serializer_class() @action(detail=True, methods=['get']) def pages(self, request, pk=None, uuid=None): flow = self.get_object() serializer = OnboardingFlowDetailSerializer(flow, context={'request': request}) return Response(serializer.data) @action(detail=True, methods=['post']) def generate(self, request, pk=None, uuid=None): flow = self.get_object() if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False): return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN) if not flow.agent or not flow.agent.model or not flow.agent.model.path: return Response( {"error": "flow_agent_model_required"}, status=status.HTTP_400_BAD_REQUEST, ) instructions = request.data.get('instructions') or '' rag_context = "" try: rag_context = ml_services.get_context_for_query( query=f"Create onboarding content for role {flow.role.name}", role_uuid=str(flow.role.uuid), top_k=6, similarity_threshold=0.35, ) except Exception as e: logger.warning("Onboarding generation RAG lookup failed: %s", e) prompt = ( "You are creating onboarding content as JSON. " "Return ONLY valid JSON (no prose, no markdown, no code fences).\n" "Do not include explanations or examples.\n" "Do not include HTML tags. Use plain text only.\n" "Each page body must be 3-6 paragraphs, at least 320 words total, and include 1 short list of 3-5 bullets.\n" "Before writing the body, create a brief outline of the key points to cover and include it in meta.outline.\n" "The outline should be a short list of 3-6 bullets, not chain-of-thought.\n" "Do NOT ask about the learner's personal experience. Ask about what someone in the role may encounter.\n" "Do NOT use any select or multiselect fields. Use only text, textarea, number, boolean, or date.\n" "Use the provided context for accurate, role-specific content.\n" "If context is insufficient, make reasonable assumptions without inventing tools or policies.\n" "JSON shape:\n" "{\n" " \"title\": string,\n" " \"description\": string,\n" " \"pages\": [\n" " {\n" " \"title\": string,\n" " \"body\": string,\n" " \"meta\": { \"outline\": [string] },\n" " \"fields\": [\n" " {\n" " \"key\": string,\n" " \"label\": string,\n" " \"type\": one of [text, textarea, number, boolean, date],\n" " \"required\": boolean,\n" " \"help_text\": string,\n" " \"placeholder\": string,\n" " \"options\": []\n" " }\n" " ]\n" " }\n" " ]\n" "}\n" f"Role: {flow.role.name}\n" f"Role description: {flow.role.description}\n" f"Flow title: {flow.title}\n" f"Flow description: {flow.description}\n" f"Extra instructions: {instructions}\n" f"Context:\n{rag_context}\n" ) try: result = ml_services.infer_with_model(flow.agent.model.path, prompt, { "max_tokens": 1800, "temperature": 0.2, }) except Exception as e: logger.error("Onboarding generate inference failed: %s", e, exc_info=True) return Response({"error": "generation_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) response_text = '' if isinstance(result, dict): response_text = result.get('response') or result.get('result') or '' payload = _extract_json(str(response_text)) if not payload or 'pages' not in payload: return Response({"error": "invalid_generation_output", "raw": response_text}, status=status.HTTP_400_BAD_REQUEST) with transaction.atomic(): flow.title = payload.get('title') or flow.title # Keep existing description on regenerate unless explicitly empty if not flow.description: flow.description = payload.get('description') or flow.description if flow.status != 'draft': flow.status = 'draft' flow.save(update_fields=['title', 'description', 'status']) OnboardingPage.objects.filter(flow=flow).delete() pages = payload.get('pages') or [] for page_index, page in enumerate(pages): body_text = _strip_html(page.get('body') or '') page_obj = OnboardingPage.objects.create( flow=flow, order=page_index, title=page.get('title') or f"Page {page_index + 1}", body=body_text, meta=page.get('meta') or {}, ) for field_index, field in enumerate(page.get('fields') or []): field_type = field.get('type') or 'text' if field_type not in {"text", "textarea", "number", "boolean", "date"}: field_type = 'text' OnboardingField.objects.create( page=page_obj, order=field_index, key=field.get('key') or f"field_{field_index + 1}", label=field.get('label') or f"Field {field_index + 1}", field_type=field_type, required=bool(field.get('required')), help_text=field.get('help_text') or '', placeholder=field.get('placeholder') or '', options=[], default_value=field.get('default_value') if field.get('default_value') is not None else None, validation=field.get('validation') or {}, ) serializer = OnboardingFlowDetailSerializer(flow, context={'request': request}) return Response(serializer.data) @action(detail=True, methods=['post']) def publish(self, request, pk=None, uuid=None): flow = self.get_object() if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False): return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN) if flow.status != 'published': flow.status = 'published' flow.save(update_fields=['status']) serializer = OnboardingFlowDetailSerializer(flow, context={'request': request}) return Response(serializer.data) class OnboardingPageViewSet(ModelViewSet): queryset = OnboardingPage.objects.select_related('flow').prefetch_related('fields').all() serializer_class = OnboardingPageSerializer lookup_field = 'uuid' class OnboardingFieldViewSet(ModelViewSet): queryset = OnboardingField.objects.select_related('page').all() serializer_class = OnboardingFieldSerializer lookup_field = 'uuid' class OnboardingSessionViewSet(ModelViewSet): queryset = OnboardingSession.objects.select_related('flow', 'user', 'agent_run', 'flow__agent').all() serializer_class = OnboardingSessionSerializer lookup_field = 'uuid' def get_queryset(self): qs = super().get_queryset() user = self.request.user if user.is_authenticated and not getattr(user, 'is_manager', False): qs = qs.filter(user=user) return qs def perform_create(self, serializer): if not self.request.user or not self.request.user.is_authenticated: raise PermissionDenied("Authentication required") flow = serializer.validated_data.get('flow') agent_run = None if flow and flow.agent: agent_run = AgentRun.objects.create( agent=flow.agent, user=self.request.user, input_data={ "type": "onboarding_session", "flow_uuid": str(flow.uuid), "role_uuid": str(flow.role.uuid), }, ) serializer.save(user=self.request.user, agent_run=agent_run) @action(detail=False, methods=['post']) def get_or_create(self, request): if not request.user or not request.user.is_authenticated: raise PermissionDenied("Authentication required") flow_uuid = request.data.get('flow') if not flow_uuid: return Response({"error": "flow_required"}, status=status.HTTP_400_BAD_REQUEST) try: flow = OnboardingFlow.objects.get(uuid=flow_uuid) except OnboardingFlow.DoesNotExist: return Response({"error": "flow_not_found"}, status=status.HTTP_404_NOT_FOUND) session = ( OnboardingSession.objects .filter(flow=flow, user=request.user) .exclude(status='completed') .order_by('-updated_at') .first() ) if not session: agent_run = None if flow.agent: agent_run = AgentRun.objects.create( agent=flow.agent, user=request.user, input_data={ "type": "onboarding_session", "flow_uuid": str(flow.uuid), "role_uuid": str(flow.role.uuid), }, ) session = OnboardingSession.objects.create( flow=flow, user=request.user, agent_run=agent_run, ) return Response(OnboardingSessionSerializer(session, context={'request': request}).data) @action(detail=True, methods=['post']) def submit(self, request, pk=None, uuid=None): session = self.get_object() serializer = OnboardingSubmissionSerializer(data=request.data) serializer.is_valid(raise_exception=True) page_uuid = serializer.validated_data['page_uuid'] responses = serializer.validated_data['responses'] mark_complete = serializer.validated_data.get('mark_complete') try: page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid) except OnboardingPage.DoesNotExist: return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND) responses_payload = dict(session.responses or {}) responses_payload[str(page.uuid)] = responses session.responses = responses_payload session.current_page_order = page.order if mark_complete or page.order >= session.flow.pages.count() - 1: session.status = 'completed' session.completed_at = timezone.now() session.save(update_fields=['responses', 'current_page_order', 'status', 'completed_at']) if session.agent_run: progress_payload = { "flow_uuid": str(session.flow.uuid), "session_uuid": str(session.uuid), "page_uuid": str(page.uuid), "page_order": page.order, "status": session.status, "responses": responses, } _send_agent_progress_event(session.agent_run, progress_payload) session.agent_run.output_data = { **(session.agent_run.output_data or {}), "onboarding": session.responses, } session.agent_run.save(update_fields=['output_data']) return Response(OnboardingSessionSerializer(session, context={'request': request}).data) @action(detail=True, methods=['post']) def feedback(self, request, pk=None, uuid=None): session = self.get_object() serializer = OnboardingFeedbackSerializer(data=request.data) serializer.is_valid(raise_exception=True) page_uuid = serializer.validated_data['page_uuid'] responses = serializer.validated_data['responses'] question = serializer.validated_data.get('question') or '' try: page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid) except OnboardingPage.DoesNotExist: return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND) if not session.flow.agent or not session.flow.agent.model or not session.flow.agent.model.path: return Response({"error": "flow_agent_model_required"}, status=status.HTTP_400_BAD_REQUEST) prompt = ( "You are an onboarding assessor. Provide concise feedback addressed directly to the learner using second-person \"You\" statements.\n" "Return ONLY valid JSON (no prose, no markdown, no code fences).\n" "JSON shape:\n" "{\n" " \"summary\": string\n" "}\n\n" f"Page title: {page.title}\n" f"Page body: {page.body}\n" f"Responses: {json.dumps(responses)}\n" ) if question: prompt += f"Learner question: {question}\n" try: result = ml_services.infer_with_model(session.flow.agent.model.path, prompt, { "max_tokens": 900, "temperature": 0.2, }) except Exception as e: logger.error("Onboarding feedback inference failed: %s", e, exc_info=True) return Response({"error": "feedback_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) feedback_text = '' if isinstance(result, dict): feedback_text = result.get('response') or result.get('result') or '' feedback_text = str(feedback_text).strip() feedback_payload = _extract_json(feedback_text) if not feedback_payload: feedback_payload = { "summary": feedback_text or "Feedback generated.", } responses_payload = dict(session.responses or {}) feedback_store = dict(responses_payload.get("__feedback__") or {}) feedback_store[str(page.uuid)] = { "feedback": feedback_payload, "question": question, "updated_at": timezone.now().isoformat(), } responses_payload["__feedback__"] = feedback_store session.responses = responses_payload session.save(update_fields=['responses']) return Response({ "feedback": feedback_payload, "session": OnboardingSessionSerializer(session, context={'request': request}).data, })