Added ca gen prompt, few more tools, updated assessment task for agent prompts

2026-03-18 23:01:20 +00:00 · 2026-03-18 23:01:20 +00:00 · fc9ba3396a
commit fc9ba3396a
parent a594f93154
4 changed files with 137 additions and 48 deletions
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@ -122,9 +122,9 @@ def ingest_training_file_task(self, file_uuid):
@shared_task(name="apps.knowledge.tasks.update_agent_prompts_from_file_task", bind=True, soft_time_limit=120, time_limit=180)
 def update_agent_prompts_from_file_task(self, role_uuid: str):
    """
-    After a training file is ingested (or deleted), refine the curriculum AgentConfig
+    After a training file is ingested (or deleted), refine the curriculum and assessment
-    system prompt using document content. Resets to the canonical base prompt when no
+    AgentConfig system prompts using document content. Resets to canonical base prompts
-    files remain.
+    when no files remain.
    """
    from apps.accounts.models import Role
    from apps.onboarding.consumers.prompts import OnboardingPrompts
@ -136,10 +136,10 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
        logger.warning("update_agent_prompts_from_file_task: role %s not found", role_uuid)
        return
-    curriculum_config = AgentConfig.objects.filter(role=role, agent_type='curriculum').first()
+    configs = {
-    if not curriculum_config:
+        cfg.agent_type: cfg
-        logger.warning("update_agent_prompts_from_file_task: no curriculum config for role %s", role_uuid)
+        for cfg in AgentConfig.objects.filter(role=role, agent_type__in=['curriculum', 'assessment'])
-        return
+    }
    chunk_texts = list(
        RoleRagDocument.objects.filter(role=role, is_active=True)
@ -147,39 +147,53 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
        .values_list('content', flat=True)[:30]
    )
-    # No files left... so we should reset
+    # No files left — reset both to their canonical base prompts
    if not chunk_texts:
-        curriculum_config.system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name)
+        to_update = []
-        curriculum_config.save(update_fields=['system_prompt', 'updated_at'])
+        if 'curriculum' in configs:
-        logger.info("update_agent_prompts_from_file_task: reset to base prompt for role %s", role_uuid)
+            configs['curriculum'].system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name)
            to_update.append(configs['curriculum'])
        if 'assessment' in configs:
            configs['assessment'].system_prompt = OnboardingPrompts.default_assessment_prompt(role.name)
            to_update.append(configs['assessment'])
        for cfg in to_update:
            cfg.save(update_fields=['system_prompt', 'updated_at'])
        logger.info("update_agent_prompts_from_file_task: reset to base prompts for role %s", role_uuid)
        return
    combined_text = '\n\n'.join(chunk_texts)[:6000]
-    base_prompt = OnboardingPrompts.default_curriculum_prompt(role.name)
+
    refine_calls = [
        (
            'curriculum',
            OnboardingPrompts.refine_curriculum_prompt(
                role.name, OnboardingPrompts.default_curriculum_prompt(role.name), combined_text
            ),
        ),
        (
            'assessment',
            OnboardingPrompts.refine_assessment_prompt(
                role.name, OnboardingPrompts.default_assessment_prompt(role.name), combined_text
            ),
        ),
    ]
    try:
        with Client(timeout=Timeout(60.0)) as client:
            for agent_type, user_prompt in refine_calls:
                if agent_type not in configs:
                    continue
                response = client.post(
                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                    json={
                        "model": "meta-llama-3.1-8b-instruct",
-                    "messages": [
+                        "messages": [{"role": "user", "content": user_prompt}],
                        {
                            "role": "user",
                            "content": OnboardingPrompts.refine_curriculum_prompt(
                                role.name, base_prompt, combined_text
                            ),
                        },
                    ],
                        "max_tokens": 600,
                    },
                )
                response.raise_for_status()
-            refined_prompt = response.json()["choices"][0]["message"]["content"].strip()
+                configs[agent_type].system_prompt = response.json()["choices"][0]["message"]["content"].strip()
                configs[agent_type].save(update_fields=['system_prompt', 'updated_at'])
                logger.info("update_agent_prompts_from_file_task: refined %s prompt for role %s", agent_type, role_uuid)
    except Exception as e:
        logger.exception("update_agent_prompts_from_file_task: LLM call failed for role %s: %s", role_uuid, e)
        return
    curriculum_config.system_prompt = refined_prompt
    curriculum_config.save(update_fields=['system_prompt', 'updated_at'])
    logger.info("update_agent_prompts_from_file_task: refined curriculum prompt for role %s", role_uuid)
--- a/apps/onboarding/consumers/generate.py
+++ b/apps/onboarding/consumers/generate.py
@ -1,4 +1,5 @@
 import json
 import random
 import re
 from uuid import uuid4
@ -39,8 +40,13 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer):
        ca_config = await self.get_config_by_type(role.uuid, 'curriculum')
        if not ca_config:
            return await self.send_error("Missing curriculum AgentConfig for this role")
-        ca_response = await self.orchestrate(OnboardingPrompts.curriculum_generation_prompt(), ca_config, minimum_turns=1, max_tokens=384)
+        initial_hits = await self.fetch_knowledge_context(role.uuid, f"{role.name} role responsibilities and key training areas")
-        topics = self._extract_json_list(ca_response)
+        initial_context = self.format_knowledge_context(initial_hits)
        ca_response = await self.orchestrate(
            OnboardingPrompts.curriculum_generation_prompt(str(role.uuid), role.name, initial_context),
            ca_config, minimum_turns=1, max_tokens=384,
        )
        topics = self._extract_json_list(ca_response)[:15]
        if not topics:
            return await self.send_log(LogType.ERROR, "Curriculum generation returned no topics", f"Curriculum generation produced no topics for role={role.name} (uuid={role.uuid})")
        full_structure = []
@ -75,13 +81,7 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer):
        aa_config = await self.get_config_by_type(role.uuid, 'assessment')
        if not aa_config:
            return await self.send_error("Missing assessment AgentConfig for this role")
-        question_count = 8
+        question_count = random.randint(6, 10)
        try:
            random_result = await self.router.handle_tool_call("random_int", {"min": 6, "max": 10})
            if isinstance(random_result, dict) and isinstance(random_result.get("value"), int):
                question_count = int(random_result["value"])
        except Exception:
            question_count = 8
        quiz_response = await self.orchestrate(
            OnboardingPrompts.quiz_generation_prompt(question_count, module_briefs),
            aa_config,
--- a/apps/onboarding/consumers/prompts.py
+++ b/apps/onboarding/consumers/prompts.py
@ -16,10 +16,17 @@ class OnboardingPrompts:
        return "Double check your reasoning and provide the final improved answer."
    @staticmethod
-    def curriculum_generation_prompt():
+    def curriculum_generation_prompt(role_uuid: str, role_name: str, initial_context: str = '') -> str:
        context_section = f"\nInitial knowledge base search results:\n{initial_context}\n" if initial_context else ''
        return (
-            "Based on available documentation, create an onboarding curriculum for this role. "
+            f"Create an onboarding curriculum for the '{role_name}' role (role_uuid: {role_uuid}).\n"
-            "Output ONLY a valid JSON array of 3-5 strings representing module titles. "
+            "Use the available tools to gather context before deciding on modules:\n"
            "- Call get_role_context to read the role description\n"
            "- Call list_training_files to see what training materials exist\n"
            "- Call search_knowledge with a relevant query to find specific content\n"
            f"{context_section}\n"
            "Based on what you find, decide how many modules are appropriate for this role's complexity — up to 15. "
            "Output ONLY a valid JSON array of strings representing module titles. "
            "Example: [\"Introduction\", \"Safety\", \"Operations\"]"
        )
@ -121,6 +128,19 @@ class OnboardingPrompts:
            f"Training document content:\n{document_text}"
        )
    @staticmethod
    def refine_assessment_prompt(role_name: str, base_prompt: str, document_text: str) -> str:
        return (
            f"You are refining an assessment agent's system prompt for the '{role_name}' role. "
            "Training documents have been uploaded. Rewrite the system prompt below so it targets "
            "the core competency areas and standards described in those documents. "
            "Focus on what should be assessed — key responsibilities, decision points, and quality criteria — "
            "not on topic lists. Preserve all original instructions. "
            "Return ONLY the refined system prompt text — no commentary, no labels.\n\n"
            f"Original system prompt:\n{base_prompt}\n\n"
            f"Training document content:\n{document_text}"
        )
    FALLBACK_SYSTEM_PROMPT = 'You are a helpful onboarding assistant.'
    KA_HELP_FALLBACK = (
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@ -8,7 +8,7 @@ from django.db.models import Q
 from pgvector.django import CosineDistance
 from apps.accounts.models import Role
-from apps.knowledge.models import RoleRagDocument
+from apps.knowledge.models import RoleRagDocument, TrainingFile
 from apps.onboarding.models import OnboardingSession
 logger = logging.getLogger(__name__)
@ -61,7 +61,7 @@ class MCPRouter:
    async def _get_embedding(self, text):
        logger.info('MCP embedding request started')
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=60.0) as client:
            response = await client.post(
                settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                json={'input': text},
@ -154,6 +154,61 @@ class MCPRouter:
        logger.info('MCP update_progress completed: session_uuid=%s', session_uuid)
        return {'status': 'success', 'new_state': state}
    @mcp_tool(
        name='get_role_context',
        description='Get the name, description, and organization for a role. Call this first to understand what the role involves before generating content.',
        input_schema={
            'type': 'object',
            'properties': {
                'role_uuid': {'type': 'string', 'description': 'The UUID of the role'},
            },
            'required': ['role_uuid'],
        },
    )
    @database_sync_to_async
    def _get_role_context(self, args):
        role_uuid = args.get('role_uuid')
        role = Role.objects.select_related('organization').filter(uuid=role_uuid).first()
        if role is None:
            logger.warning('MCP get_role_context role not found: role_uuid=%s', role_uuid)
            return {'error': 'Role not found'}
        logger.info('MCP get_role_context completed: role_uuid=%s', role_uuid)
        return {
            'name': role.name,
            'description': role.description or '',
            'organization': role.organization.name,
            'member_count': role.members.count(),
        }
    @mcp_tool(
        name='list_training_files',
        description='List processed training files available for a role. Use this to understand what source materials exist before generating curriculum or content.',
        input_schema={
            'type': 'object',
            'properties': {
                'role_uuid': {'type': 'string', 'description': 'The UUID of the role'},
            },
            'required': ['role_uuid'],
        },
    )
    @database_sync_to_async
    def _list_training_files(self, args):
        role_uuid = args.get('role_uuid')
        role = Role.objects.select_related('organization').filter(uuid=role_uuid).first()
        if role is None:
            logger.warning('MCP list_training_files role not found: role_uuid=%s', role_uuid)
            return {'error': 'Role not found'}
        files = list(
            TrainingFile.objects.filter(
                organization=role.organization,
                is_processed=True,
            ).filter(
                Q(role__uuid=role_uuid) | Q(role__isnull=True)
            ).values('file_name', 'description', 'file_type')[:20]
        )
        logger.info('MCP list_training_files completed: role_uuid=%s count=%s', role_uuid, len(files))
        return {'files': files, 'count': len(files)}
    @mcp_tool(
        name='random_int',
        description='Generate a random integer in an inclusive range.',