import os from django.conf import settings from django.db import transaction from django.db.models import CASCADE, SET_NULL, BooleanField, CharField, FileField, ForeignKey, IntegerField, JSONField, Model, TextField from django.db.models.signals import post_delete, post_save from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from pgvector.django import VectorField from apps.accounts.mixins import IdentifierMixin, TimeStampMixin from apps.accounts.models import Organization, Role, User class TrainingFile(IdentifierMixin, TimeStampMixin, Model): STATUS_CHOICES = [ ('ingesting', 'Ingesting'), ('chunked', 'Chunked'), ('embedded', 'Embedded'), ('failed', 'Failed'), ] organization = ForeignKey(Organization, on_delete=CASCADE, related_name="training_files") role = ForeignKey(Role, on_delete=CASCADE, related_name="training_files", null=True, blank=True) uploaded_by = ForeignKey(User, on_delete=CASCADE, related_name="uploaded_training_files") file = FileField(upload_to='training_files/%Y/%m/%d/') file_name = CharField(max_length=255) file_size = IntegerField() file_type = CharField(max_length=50) description = TextField(blank=True, default='') status = CharField(max_length=20, choices=STATUS_CHOICES, default='ingesting') is_processed = BooleanField(default=False) class Meta: verbose_name = _("Training File") verbose_name_plural = _("Training Files") ordering = ['-created_at'] def __str__(self) -> str: if self.role_id: return f"{self.file_name} ({self.role.name})" return f"{self.file_name} ({self.organization.name} - Organization-wide)" class RoleRagDocument(IdentifierMixin, TimeStampMixin, Model): organization = ForeignKey(Organization, on_delete=CASCADE, related_name='rag_documents') role = ForeignKey(Role, on_delete=SET_NULL, related_name='rag_documents', null=True, blank=True) training_file = ForeignKey(TrainingFile, on_delete=CASCADE, related_name='chunks', null=True, blank=True) content = TextField() content_hash = CharField(max_length=64, db_index=True) embedding = VectorField(dimensions=settings.EMBEDDING_DIMENSIONS, null=True, blank=True) metadata = JSONField(default=dict, blank=True) chunk_index = IntegerField(default=0) is_active = BooleanField(default=True) class Meta: verbose_name = _("Role RAG Document") verbose_name_plural = _("Role RAG Documents") def __str__(self) -> str: if self.role_id: return f"{self.role.name} - Chunk {self.chunk_index}" return f"{self.organization.name} (Organization-wide) - Chunk {self.chunk_index}" @receiver(post_delete, sender=TrainingFile) def delete_physical_file(sender, instance, **kwargs): if instance.file: if os.path.isfile(instance.file.path): os.remove(instance.file.path) @receiver(post_save, sender=TrainingFile) def trigger_ingestion(sender, instance, created, **kwargs): if created: def _enqueue(): from apps.knowledge.tasks import ingest_training_file_task # L: circular import :( ingest_training_file_task.delay(str(instance.uuid)) transaction.on_commit(_enqueue)