74 lines
No EOL
2.7 KiB
Python
74 lines
No EOL
2.7 KiB
Python
import os
|
|
|
|
from django.db import transaction
|
|
from django.db.models import CASCADE, BooleanField, CharField, FileField, ForeignKey, IntegerField, JSONField, Model, TextField
|
|
from django.db.models.signals import post_delete, post_save
|
|
from django.dispatch import receiver
|
|
from django.utils.translation import gettext_lazy as _
|
|
from pgvector.django import VectorField
|
|
|
|
from apps.accounts.mixins import IdentifierMixin, TimeStampMixin
|
|
from apps.accounts.models import Role, User
|
|
|
|
class TrainingFile(IdentifierMixin, TimeStampMixin, Model):
|
|
STATUS_CHOICES = [
|
|
('ingesting', 'Ingesting'),
|
|
('chunked', 'Chunked'),
|
|
('embedded', 'Embedded'),
|
|
('failed', 'Failed'),
|
|
]
|
|
|
|
role = ForeignKey(Role, on_delete=CASCADE, related_name="training_files")
|
|
uploaded_by = ForeignKey(User, on_delete=CASCADE, related_name="uploaded_training_files")
|
|
|
|
file = FileField(upload_to='training_files/%Y/%m/%d/')
|
|
file_name = CharField(max_length=255)
|
|
file_size = IntegerField()
|
|
file_type = CharField(max_length=50)
|
|
|
|
description = TextField(blank=True, default='')
|
|
status = CharField(max_length=20, choices=STATUS_CHOICES, default='ingesting')
|
|
is_processed = BooleanField(default=False)
|
|
|
|
class Meta:
|
|
verbose_name = _("Training File")
|
|
verbose_name_plural = _("Training Files")
|
|
ordering = ['-created_at']
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.file_name} ({self.role.name})"
|
|
|
|
class RoleRagDocument(IdentifierMixin, TimeStampMixin, Model):
|
|
|
|
role = ForeignKey(Role, on_delete=CASCADE, related_name='rag_documents')
|
|
training_file = ForeignKey(TrainingFile, on_delete=CASCADE, related_name='chunks', null=True, blank=True)
|
|
|
|
content = TextField()
|
|
content_hash = CharField(max_length=64, db_index=True)
|
|
|
|
embedding = VectorField(dimensions=1536, null=True, blank=True)
|
|
|
|
metadata = JSONField(default=dict, blank=True)
|
|
chunk_index = IntegerField(default=0)
|
|
is_active = BooleanField(default=True)
|
|
|
|
class Meta:
|
|
verbose_name = _("Role RAG Document")
|
|
verbose_name_plural = _("Role RAG Documents")
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.role.name} - Chunk {self.chunk_index}"
|
|
|
|
@receiver(post_delete, sender=TrainingFile)
|
|
def delete_physical_file(sender, instance, **kwargs):
|
|
if instance.file:
|
|
if os.path.isfile(instance.file.path):
|
|
os.remove(instance.file.path)
|
|
|
|
@receiver(post_save, sender=TrainingFile)
|
|
def trigger_ingestion(sender, instance, created, **kwargs):
|
|
if created:
|
|
def _enqueue():
|
|
from apps.knowledge.tasks import ingest_training_file_task # L: circular import :(
|
|
ingest_training_file_task.delay(str(instance.uuid))
|
|
transaction.on_commit(_enqueue) |