60 lines
2.1 KiB
Python
60 lines
2.1 KiB
Python
|
|
"""
|
||
|
|
Document models for the application.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from datetime import datetime
|
||
|
|
import json
|
||
|
|
from app.database.db import db
|
||
|
|
|
||
|
|
class Document(db.Model):
|
||
|
|
"""Document model representing a document in the library."""
|
||
|
|
|
||
|
|
__tablename__ = 'documents'
|
||
|
|
|
||
|
|
id = db.Column(db.Integer, primary_key=True)
|
||
|
|
title = db.Column(db.String(255), nullable=False)
|
||
|
|
description = db.Column(db.Text, nullable=True)
|
||
|
|
file_path = db.Column(db.String(255), nullable=True)
|
||
|
|
content_type = db.Column(db.String(50), nullable=False)
|
||
|
|
status = db.Column(db.String(20), default='pending') # pending, processing, completed, error
|
||
|
|
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
||
|
|
updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||
|
|
|
||
|
|
# Foreign keys
|
||
|
|
uploaded_by = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)
|
||
|
|
|
||
|
|
# Relationships
|
||
|
|
chunks = db.relationship('DocumentChunk', backref='document', lazy='dynamic', cascade='all, delete-orphan')
|
||
|
|
|
||
|
|
def __repr__(self):
|
||
|
|
return f'<Document {self.id}: {self.title}>'
|
||
|
|
|
||
|
|
|
||
|
|
class DocumentChunk(db.Model):
|
||
|
|
"""Model representing a chunk of a document for embedding."""
|
||
|
|
|
||
|
|
__tablename__ = 'document_chunks'
|
||
|
|
|
||
|
|
id = db.Column(db.Integer, primary_key=True)
|
||
|
|
content = db.Column(db.Text, nullable=False)
|
||
|
|
chunk_index = db.Column(db.Integer, nullable=False)
|
||
|
|
embedding_id = db.Column(db.String(100), nullable=True) # ID in Pinecone
|
||
|
|
meta_data = db.Column(db.Text, nullable=True) # JSON string of metadata
|
||
|
|
created_at = db.Column(db.DateTime, default=datetime.utcnow)
|
||
|
|
|
||
|
|
# Foreign keys
|
||
|
|
document_id = db.Column(db.Integer, db.ForeignKey('documents.id'), nullable=False)
|
||
|
|
|
||
|
|
def set_metadata(self, metadata_dict):
|
||
|
|
"""Set metadata as JSON string."""
|
||
|
|
self.meta_data = json.dumps(metadata_dict)
|
||
|
|
|
||
|
|
def get_metadata(self):
|
||
|
|
"""Get metadata as dictionary."""
|
||
|
|
if self.meta_data:
|
||
|
|
return json.loads(self.meta_data)
|
||
|
|
return {}
|
||
|
|
|
||
|
|
def __repr__(self):
|
||
|
|
return f'<DocumentChunk {self.id}: doc_id={self.document_id}, index={self.chunk_index}>'
|