From fc9ba3396a7bf80550fe7b59a53cce6a1aa66842 Mon Sep 17 00:00:00 2001 From: Viswamedha Nalabotu Date: Wed, 18 Mar 2026 23:01:20 +0000 Subject: [PATCH] Added ca gen prompt, few more tools, updated assessment task for agent prompts --- apps/knowledge/tasks.py | 82 ++++++++++++++++----------- apps/onboarding/consumers/generate.py | 18 +++--- apps/onboarding/consumers/prompts.py | 26 ++++++++- apps/onboarding/mcp.py | 59 ++++++++++++++++++- 4 files changed, 137 insertions(+), 48 deletions(-) diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py index e4faa02..0729c74 100644 --- a/apps/knowledge/tasks.py +++ b/apps/knowledge/tasks.py @@ -122,9 +122,9 @@ def ingest_training_file_task(self, file_uuid): @shared_task(name="apps.knowledge.tasks.update_agent_prompts_from_file_task", bind=True, soft_time_limit=120, time_limit=180) def update_agent_prompts_from_file_task(self, role_uuid: str): """ - After a training file is ingested (or deleted), refine the curriculum AgentConfig - system prompt using document content. Resets to the canonical base prompt when no - files remain. + After a training file is ingested (or deleted), refine the curriculum and assessment + AgentConfig system prompts using document content. Resets to canonical base prompts + when no files remain. """ from apps.accounts.models import Role from apps.onboarding.consumers.prompts import OnboardingPrompts @@ -136,10 +136,10 @@ def update_agent_prompts_from_file_task(self, role_uuid: str): logger.warning("update_agent_prompts_from_file_task: role %s not found", role_uuid) return - curriculum_config = AgentConfig.objects.filter(role=role, agent_type='curriculum').first() - if not curriculum_config: - logger.warning("update_agent_prompts_from_file_task: no curriculum config for role %s", role_uuid) - return + configs = { + cfg.agent_type: cfg + for cfg in AgentConfig.objects.filter(role=role, agent_type__in=['curriculum', 'assessment']) + } chunk_texts = list( RoleRagDocument.objects.filter(role=role, is_active=True) @@ -147,39 +147,53 @@ def update_agent_prompts_from_file_task(self, role_uuid: str): .values_list('content', flat=True)[:30] ) - # No files left... so we should reset + # No files left — reset both to their canonical base prompts if not chunk_texts: - curriculum_config.system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name) - curriculum_config.save(update_fields=['system_prompt', 'updated_at']) - logger.info("update_agent_prompts_from_file_task: reset to base prompt for role %s", role_uuid) + to_update = [] + if 'curriculum' in configs: + configs['curriculum'].system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name) + to_update.append(configs['curriculum']) + if 'assessment' in configs: + configs['assessment'].system_prompt = OnboardingPrompts.default_assessment_prompt(role.name) + to_update.append(configs['assessment']) + for cfg in to_update: + cfg.save(update_fields=['system_prompt', 'updated_at']) + logger.info("update_agent_prompts_from_file_task: reset to base prompts for role %s", role_uuid) return combined_text = '\n\n'.join(chunk_texts)[:6000] - base_prompt = OnboardingPrompts.default_curriculum_prompt(role.name) + + refine_calls = [ + ( + 'curriculum', + OnboardingPrompts.refine_curriculum_prompt( + role.name, OnboardingPrompts.default_curriculum_prompt(role.name), combined_text + ), + ), + ( + 'assessment', + OnboardingPrompts.refine_assessment_prompt( + role.name, OnboardingPrompts.default_assessment_prompt(role.name), combined_text + ), + ), + ] try: with Client(timeout=Timeout(60.0)) as client: - response = client.post( - settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, - json={ - "model": "meta-llama-3.1-8b-instruct", - "messages": [ - { - "role": "user", - "content": OnboardingPrompts.refine_curriculum_prompt( - role.name, base_prompt, combined_text - ), - }, - ], - "max_tokens": 600, - }, - ) - response.raise_for_status() - refined_prompt = response.json()["choices"][0]["message"]["content"].strip() + for agent_type, user_prompt in refine_calls: + if agent_type not in configs: + continue + response = client.post( + settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, + json={ + "model": "meta-llama-3.1-8b-instruct", + "messages": [{"role": "user", "content": user_prompt}], + "max_tokens": 600, + }, + ) + response.raise_for_status() + configs[agent_type].system_prompt = response.json()["choices"][0]["message"]["content"].strip() + configs[agent_type].save(update_fields=['system_prompt', 'updated_at']) + logger.info("update_agent_prompts_from_file_task: refined %s prompt for role %s", agent_type, role_uuid) except Exception as e: logger.exception("update_agent_prompts_from_file_task: LLM call failed for role %s: %s", role_uuid, e) - return - - curriculum_config.system_prompt = refined_prompt - curriculum_config.save(update_fields=['system_prompt', 'updated_at']) - logger.info("update_agent_prompts_from_file_task: refined curriculum prompt for role %s", role_uuid) diff --git a/apps/onboarding/consumers/generate.py b/apps/onboarding/consumers/generate.py index 2ebdaf2..62c243f 100644 --- a/apps/onboarding/consumers/generate.py +++ b/apps/onboarding/consumers/generate.py @@ -1,4 +1,5 @@ import json +import random import re from uuid import uuid4 @@ -39,8 +40,13 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer): ca_config = await self.get_config_by_type(role.uuid, 'curriculum') if not ca_config: return await self.send_error("Missing curriculum AgentConfig for this role") - ca_response = await self.orchestrate(OnboardingPrompts.curriculum_generation_prompt(), ca_config, minimum_turns=1, max_tokens=384) - topics = self._extract_json_list(ca_response) + initial_hits = await self.fetch_knowledge_context(role.uuid, f"{role.name} role responsibilities and key training areas") + initial_context = self.format_knowledge_context(initial_hits) + ca_response = await self.orchestrate( + OnboardingPrompts.curriculum_generation_prompt(str(role.uuid), role.name, initial_context), + ca_config, minimum_turns=1, max_tokens=384, + ) + topics = self._extract_json_list(ca_response)[:15] if not topics: return await self.send_log(LogType.ERROR, "Curriculum generation returned no topics", f"Curriculum generation produced no topics for role={role.name} (uuid={role.uuid})") full_structure = [] @@ -75,13 +81,7 @@ class OnboardingGenerateConsumer(BaseOnboardingConsumer): aa_config = await self.get_config_by_type(role.uuid, 'assessment') if not aa_config: return await self.send_error("Missing assessment AgentConfig for this role") - question_count = 8 - try: - random_result = await self.router.handle_tool_call("random_int", {"min": 6, "max": 10}) - if isinstance(random_result, dict) and isinstance(random_result.get("value"), int): - question_count = int(random_result["value"]) - except Exception: - question_count = 8 + question_count = random.randint(6, 10) quiz_response = await self.orchestrate( OnboardingPrompts.quiz_generation_prompt(question_count, module_briefs), aa_config, diff --git a/apps/onboarding/consumers/prompts.py b/apps/onboarding/consumers/prompts.py index ce84fc5..45f088d 100644 --- a/apps/onboarding/consumers/prompts.py +++ b/apps/onboarding/consumers/prompts.py @@ -16,10 +16,17 @@ class OnboardingPrompts: return "Double check your reasoning and provide the final improved answer." @staticmethod - def curriculum_generation_prompt(): + def curriculum_generation_prompt(role_uuid: str, role_name: str, initial_context: str = '') -> str: + context_section = f"\nInitial knowledge base search results:\n{initial_context}\n" if initial_context else '' return ( - "Based on available documentation, create an onboarding curriculum for this role. " - "Output ONLY a valid JSON array of 3-5 strings representing module titles. " + f"Create an onboarding curriculum for the '{role_name}' role (role_uuid: {role_uuid}).\n" + "Use the available tools to gather context before deciding on modules:\n" + "- Call get_role_context to read the role description\n" + "- Call list_training_files to see what training materials exist\n" + "- Call search_knowledge with a relevant query to find specific content\n" + f"{context_section}\n" + "Based on what you find, decide how many modules are appropriate for this role's complexity — up to 15. " + "Output ONLY a valid JSON array of strings representing module titles. " "Example: [\"Introduction\", \"Safety\", \"Operations\"]" ) @@ -121,6 +128,19 @@ class OnboardingPrompts: f"Training document content:\n{document_text}" ) + @staticmethod + def refine_assessment_prompt(role_name: str, base_prompt: str, document_text: str) -> str: + return ( + f"You are refining an assessment agent's system prompt for the '{role_name}' role. " + "Training documents have been uploaded. Rewrite the system prompt below so it targets " + "the core competency areas and standards described in those documents. " + "Focus on what should be assessed — key responsibilities, decision points, and quality criteria — " + "not on topic lists. Preserve all original instructions. " + "Return ONLY the refined system prompt text — no commentary, no labels.\n\n" + f"Original system prompt:\n{base_prompt}\n\n" + f"Training document content:\n{document_text}" + ) + FALLBACK_SYSTEM_PROMPT = 'You are a helpful onboarding assistant.' KA_HELP_FALLBACK = ( diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py index 2a7ef00..8438fa0 100644 --- a/apps/onboarding/mcp.py +++ b/apps/onboarding/mcp.py @@ -8,7 +8,7 @@ from django.db.models import Q from pgvector.django import CosineDistance from apps.accounts.models import Role -from apps.knowledge.models import RoleRagDocument +from apps.knowledge.models import RoleRagDocument, TrainingFile from apps.onboarding.models import OnboardingSession logger = logging.getLogger(__name__) @@ -61,7 +61,7 @@ class MCPRouter: async def _get_embedding(self, text): logger.info('MCP embedding request started') - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=60.0) as client: response = await client.post( settings.INFERENCE_EMBEDDINGS_ENDPOINT, json={'input': text}, @@ -154,6 +154,61 @@ class MCPRouter: logger.info('MCP update_progress completed: session_uuid=%s', session_uuid) return {'status': 'success', 'new_state': state} + @mcp_tool( + name='get_role_context', + description='Get the name, description, and organization for a role. Call this first to understand what the role involves before generating content.', + input_schema={ + 'type': 'object', + 'properties': { + 'role_uuid': {'type': 'string', 'description': 'The UUID of the role'}, + }, + 'required': ['role_uuid'], + }, + ) + @database_sync_to_async + def _get_role_context(self, args): + role_uuid = args.get('role_uuid') + role = Role.objects.select_related('organization').filter(uuid=role_uuid).first() + if role is None: + logger.warning('MCP get_role_context role not found: role_uuid=%s', role_uuid) + return {'error': 'Role not found'} + logger.info('MCP get_role_context completed: role_uuid=%s', role_uuid) + return { + 'name': role.name, + 'description': role.description or '', + 'organization': role.organization.name, + 'member_count': role.members.count(), + } + + @mcp_tool( + name='list_training_files', + description='List processed training files available for a role. Use this to understand what source materials exist before generating curriculum or content.', + input_schema={ + 'type': 'object', + 'properties': { + 'role_uuid': {'type': 'string', 'description': 'The UUID of the role'}, + }, + 'required': ['role_uuid'], + }, + ) + @database_sync_to_async + def _list_training_files(self, args): + role_uuid = args.get('role_uuid') + role = Role.objects.select_related('organization').filter(uuid=role_uuid).first() + if role is None: + logger.warning('MCP list_training_files role not found: role_uuid=%s', role_uuid) + return {'error': 'Role not found'} + files = list( + TrainingFile.objects.filter( + organization=role.organization, + is_processed=True, + ).filter( + Q(role__uuid=role_uuid) | Q(role__isnull=True) + ).values('file_name', 'description', 'file_type')[:20] + ) + logger.info('MCP list_training_files completed: role_uuid=%s count=%s', role_uuid, len(files)) + return {'files': files, 'count': len(files)} + @mcp_tool( name='random_int', description='Generate a random integer in an inclusive range.',