Files
ds_zagres_ai/app/models/document.py
T
2025-05-09 15:41:16 +01:00

60 lines
2.1 KiB
Python

"""
Document models for the application.
"""
from datetime import datetime
import json
from app.database.db import db
class Document(db.Model):
"""Document model representing a document in the library."""
__tablename__ = 'documents'
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String(255), nullable=False)
description = db.Column(db.Text, nullable=True)
file_path = db.Column(db.String(255), nullable=True)
content_type = db.Column(db.String(50), nullable=False)
status = db.Column(db.String(20), default='pending') # pending, processing, completed, error
created_at = db.Column(db.DateTime, default=datetime.utcnow)
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Foreign keys
uploaded_by = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)
# Relationships
chunks = db.relationship('DocumentChunk', backref='document', lazy='dynamic', cascade='all, delete-orphan')
def __repr__(self):
return f'<Document {self.id}: {self.title}>'
class DocumentChunk(db.Model):
"""Model representing a chunk of a document for embedding."""
__tablename__ = 'document_chunks'
id = db.Column(db.Integer, primary_key=True)
content = db.Column(db.Text, nullable=False)
chunk_index = db.Column(db.Integer, nullable=False)
embedding_id = db.Column(db.String(100), nullable=True) # ID in Pinecone
meta_data = db.Column(db.Text, nullable=True) # JSON string of metadata
created_at = db.Column(db.DateTime, default=datetime.utcnow)
# Foreign keys
document_id = db.Column(db.Integer, db.ForeignKey('documents.id'), nullable=False)
def set_metadata(self, metadata_dict):
"""Set metadata as JSON string."""
self.meta_data = json.dumps(metadata_dict)
def get_metadata(self):
"""Get metadata as dictionary."""
if self.meta_data:
return json.loads(self.meta_data)
return {}
def __repr__(self):
return f'<DocumentChunk {self.id}: doc_id={self.document_id}, index={self.chunk_index}>'