From fc9ba3396a7bf80550fe7b59a53cce6a1aa66842 Mon Sep 17 00:00:00 2001
From: Viswamedha Nalabotu <vxn217@student.bham.ac.uk>
Date: Wed, 18 Mar 2026 23:01:20 +0000
Subject: [PATCH] Added ca gen prompt, few more tools, updated assessment task
 for agent prompts

---
 apps/knowledge/tasks.py               | 82 ++++++++++++++++-----------
 apps/onboarding/consumers/generate.py | 18 +++---
 apps/onboarding/consumers/prompts.py  | 26 ++++++++-
 apps/onboarding/mcp.py                | 59 ++++++++++++++++++-
 4 files changed, 137 insertions(+), 48 deletions(-)

diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py
index e4faa02..0729c74 100644
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@@ -122,9 +122,9 @@ def ingest_training_file_task(self, file_uuid):
 @shared_task(name="apps.knowledge.tasks.update_agent_prompts_from_file_task", bind=True, soft_time_limit=120, time_limit=180)
 def update_agent_prompts_from_file_task(self, role_uuid: str):
     """
-    After a training file is ingested (or deleted), refine the curriculum AgentConfig
-    system prompt using document content. Resets to the canonical base prompt when no
-    files remain.
+    After a training file is ingested (or deleted), refine the curriculum and assessment
+    AgentConfig system prompts using document content. Resets to canonical base prompts
+    when no files remain.
     """
     from apps.accounts.models import Role
     from apps.onboarding.consumers.prompts import OnboardingPrompts
@@ -136,10 +136,10 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
         logger.warning("update_agent_prompts_from_file_task: role %s not found", role_uuid)
         return
 
-    curriculum_config = AgentConfig.objects.filter(role=role, agent_type='curriculum').first()
-    if not curriculum_config:
-        logger.warning("update_agent_prompts_from_file_task: no curriculum config for role %s", role_uuid)
-        return
+    configs = {
+        cfg.agent_type: cfg
+        for cfg in AgentConfig.objects.filter(role=role, agent_type__in=['curriculum', 'assessment'])
+    }
 
     chunk_texts = list(
         RoleRagDocument.objects.filter(role=role, is_active=True)
@@ -147,39 +147,53 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
         .values_list('content', flat=True)[:30]
     )
 
-    # No files left... so we should reset
+    # No files left — reset both to their canonical base prompts
     if not chunk_texts:
-        curriculum_config.system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name)
-        curriculum_config.save(update_fields=['system_prompt', 'updated_at'])
-        logger.info("update_agent_prompts_from_file_task: reset to base prompt for role %s", role_uuid)
+        to_update = []
+        if 'curriculum' in configs:
+            configs['curriculum'].system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name)
+            to_update.append(configs['curriculum'])
+        if 'assessment' in configs:
+            configs['assessment'].system_prompt = OnboardingPrompts.default_assessment_prompt(role.name)
+            to_update.append(configs['assessment'])
+        for cfg in to_update:
+            cfg.save(update_fields=['system_prompt', 'updated_at'])
+        logger.info("update_agent_prompts_from_file_task: reset to base prompts for role %s", role_uuid)
         return
 
     combined_text = '\n\n'.join(chunk_texts)[:6000]
-    base_prompt = OnboardingPrompts.default_curriculum_prompt(role.name)
+
+    refine_calls = [
+        (
+            'curriculum',
+            OnboardingPrompts.refine_curriculum_prompt(
+                role.name, OnboardingPrompts.default_curriculum_prompt(role.name), combined_text
+            ),
+        ),
+        (
+            'assessment',
+            OnboardingPrompts.refine_assessment_prompt(
+                role.name, OnboardingPrompts.default_assessment_prompt(role.name), combined_text
+            ),
+        ),
+    ]
 
     try:
         with Client(timeout=Timeout(60.0)) as client:
-            response = client.post(
-                settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
-                json={
-                    "model": "meta-llama-3.1-8b-instruct",
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": OnboardingPrompts.refine_curriculum_prompt(
-                                role.name, base_prompt, combined_text
-                            ),
-                        },
-                    ],
-                    "max_tokens": 600,
-                },
-            )
-            response.raise_for_status()
-            refined_prompt = response.json()["choices"][0]["message"]["content"].strip()
+            for agent_type, user_prompt in refine_calls:
+                if agent_type not in configs:
+                    continue
+                response = client.post(
+                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
+                    json={
+                        "model": "meta-llama-3.1-8b-instruct",
+                        "messages": [{"role": "user", "content": user_prompt}],
+                        "max_tokens": 600,
+                    },
+                )
+                response.raise_for_status()
+                configs[agent_type].system_prompt = response.json()["choices"][0]["message"]["content"].strip()
+                configs[agent_type].save(update_fields=['system_prompt', 'updated_at'])
+                logger.info("update_agent_prompts_from_file_task: refined %s prompt for role %s", agent_type, role_uuid)
     except Exception as e:
         logger.exception("update_agent_prompts_from_file_task: LLM call failed for role %s: %s", role_uuid, e)
-        return
-
-    curriculum_config.system_prompt = refined_prompt
-    curriculum_config.save(update_fields=['system_prompt', 'updated_at'])
-    logger.info("update_agent_prompts_from_file_task: refined curriculum prompt for role %s", role_uuid)
diff --git a/apps/onboarding/consumers/generate.py b/apps/onboarding/consumers/generate.py
index 2ebdaf2..62c243f 100644
--- a/apps/onboarding/consumers/generate.py
+++ b/apps/onboarding/consumers/generate.py
@@ -1,4 +1,5 @@
 import json
+import random
 import re
 from uuid import uuid4
 
@@ -39,8 +40,13 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer):
         ca_config = await self.get_config_by_type(role.uuid, 'curriculum')
         if not ca_config:
             return await self.send_error("Missing curriculum AgentConfig for this role")
-        ca_response = await self.orchestrate(OnboardingPrompts.curriculum_generation_prompt(), ca_config, minimum_turns=1, max_tokens=384)
-        topics = self._extract_json_list(ca_response)
+        initial_hits = await self.fetch_knowledge_context(role.uuid, f"{role.name} role responsibilities and key training areas")
+        initial_context = self.format_knowledge_context(initial_hits)
+        ca_response = await self.orchestrate(
+            OnboardingPrompts.curriculum_generation_prompt(str(role.uuid), role.name, initial_context),
+            ca_config, minimum_turns=1, max_tokens=384,
+        )
+        topics = self._extract_json_list(ca_response)[:15]
         if not topics:
             return await self.send_log(LogType.ERROR, "Curriculum generation returned no topics", f"Curriculum generation produced no topics for role={role.name} (uuid={role.uuid})")
         full_structure = []
@@ -75,13 +81,7 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer):
         aa_config = await self.get_config_by_type(role.uuid, 'assessment')
         if not aa_config:
             return await self.send_error("Missing assessment AgentConfig for this role")
-        question_count = 8
-        try:
-            random_result = await self.router.handle_tool_call("random_int", {"min": 6, "max": 10})
-            if isinstance(random_result, dict) and isinstance(random_result.get("value"), int):
-                question_count = int(random_result["value"])
-        except Exception:
-            question_count = 8
+        question_count = random.randint(6, 10)
         quiz_response = await self.orchestrate(
             OnboardingPrompts.quiz_generation_prompt(question_count, module_briefs),
             aa_config,
diff --git a/apps/onboarding/consumers/prompts.py b/apps/onboarding/consumers/prompts.py
index ce84fc5..45f088d 100644
--- a/apps/onboarding/consumers/prompts.py
+++ b/apps/onboarding/consumers/prompts.py
@@ -16,10 +16,17 @@ class OnboardingPrompts:
         return "Double check your reasoning and provide the final improved answer."
 
     @staticmethod
-    def curriculum_generation_prompt():
+    def curriculum_generation_prompt(role_uuid: str, role_name: str, initial_context: str = '') -> str:
+        context_section = f"\nInitial knowledge base search results:\n{initial_context}\n" if initial_context else ''
         return (
-            "Based on available documentation, create an onboarding curriculum for this role. "
-            "Output ONLY a valid JSON array of 3-5 strings representing module titles. "
+            f"Create an onboarding curriculum for the '{role_name}' role (role_uuid: {role_uuid}).\n"
+            "Use the available tools to gather context before deciding on modules:\n"
+            "- Call get_role_context to read the role description\n"
+            "- Call list_training_files to see what training materials exist\n"
+            "- Call search_knowledge with a relevant query to find specific content\n"
+            f"{context_section}\n"
+            "Based on what you find, decide how many modules are appropriate for this role's complexity — up to 15. "
+            "Output ONLY a valid JSON array of strings representing module titles. "
             "Example: [\"Introduction\", \"Safety\", \"Operations\"]"
         )
 
@@ -121,6 +128,19 @@ class OnboardingPrompts:
             f"Training document content:\n{document_text}"
         )
 
+    @staticmethod
+    def refine_assessment_prompt(role_name: str, base_prompt: str, document_text: str) -> str:
+        return (
+            f"You are refining an assessment agent's system prompt for the '{role_name}' role. "
+            "Training documents have been uploaded. Rewrite the system prompt below so it targets "
+            "the core competency areas and standards described in those documents. "
+            "Focus on what should be assessed — key responsibilities, decision points, and quality criteria — "
+            "not on topic lists. Preserve all original instructions. "
+            "Return ONLY the refined system prompt text — no commentary, no labels.\n\n"
+            f"Original system prompt:\n{base_prompt}\n\n"
+            f"Training document content:\n{document_text}"
+        )
+
     FALLBACK_SYSTEM_PROMPT = 'You are a helpful onboarding assistant.'
 
     KA_HELP_FALLBACK = (
diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py
index 2a7ef00..8438fa0 100644
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@@ -8,7 +8,7 @@ from django.db.models import Q
 from pgvector.django import CosineDistance
 
 from apps.accounts.models import Role
-from apps.knowledge.models import RoleRagDocument
+from apps.knowledge.models import RoleRagDocument, TrainingFile
 from apps.onboarding.models import OnboardingSession
 
 logger = logging.getLogger(__name__)
@@ -61,7 +61,7 @@ class MCPRouter:
 
     async def _get_embedding(self, text):
         logger.info('MCP embedding request started')
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=60.0) as client:
             response = await client.post(
                 settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                 json={'input': text},
@@ -154,6 +154,61 @@ class MCPRouter:
         logger.info('MCP update_progress completed: session_uuid=%s', session_uuid)
         return {'status': 'success', 'new_state': state}
 
+    @mcp_tool(
+        name='get_role_context',
+        description='Get the name, description, and organization for a role. Call this first to understand what the role involves before generating content.',
+        input_schema={
+            'type': 'object',
+            'properties': {
+                'role_uuid': {'type': 'string', 'description': 'The UUID of the role'},
+            },
+            'required': ['role_uuid'],
+        },
+    )
+    @database_sync_to_async
+    def _get_role_context(self, args):
+        role_uuid = args.get('role_uuid')
+        role = Role.objects.select_related('organization').filter(uuid=role_uuid).first()
+        if role is None:
+            logger.warning('MCP get_role_context role not found: role_uuid=%s', role_uuid)
+            return {'error': 'Role not found'}
+        logger.info('MCP get_role_context completed: role_uuid=%s', role_uuid)
+        return {
+            'name': role.name,
+            'description': role.description or '',
+            'organization': role.organization.name,
+            'member_count': role.members.count(),
+        }
+
+    @mcp_tool(
+        name='list_training_files',
+        description='List processed training files available for a role. Use this to understand what source materials exist before generating curriculum or content.',
+        input_schema={
+            'type': 'object',
+            'properties': {
+                'role_uuid': {'type': 'string', 'description': 'The UUID of the role'},
+            },
+            'required': ['role_uuid'],
+        },
+    )
+    @database_sync_to_async
+    def _list_training_files(self, args):
+        role_uuid = args.get('role_uuid')
+        role = Role.objects.select_related('organization').filter(uuid=role_uuid).first()
+        if role is None:
+            logger.warning('MCP list_training_files role not found: role_uuid=%s', role_uuid)
+            return {'error': 'Role not found'}
+        files = list(
+            TrainingFile.objects.filter(
+                organization=role.organization,
+                is_processed=True,
+            ).filter(
+                Q(role__uuid=role_uuid) | Q(role__isnull=True)
+            ).values('file_name', 'description', 'file_type')[:20]
+        )
+        logger.info('MCP list_training_files completed: role_uuid=%s count=%s', role_uuid, len(files))
+        return {'files': files, 'count': len(files)}
+
     @mcp_tool(
         name='random_int',
         description='Generate a random integer in an inclusive range.',