All checks were successful
continuous-integration/drone/push Build is passing
* Delete legacy from bot * Clear old models * Единый http клиент * РАГ полечен
149 lines
6.2 KiB
Python
149 lines
6.2 KiB
Python
"""
|
||
Use cases для работы с документами
|
||
"""
|
||
from uuid import UUID
|
||
from typing import BinaryIO, Optional
|
||
from src.domain.entities.document import Document
|
||
from src.domain.repositories.document_repository import IDocumentRepository
|
||
from src.domain.repositories.collection_repository import ICollectionRepository
|
||
from src.domain.repositories.collection_access_repository import ICollectionAccessRepository
|
||
from src.application.services.document_parser_service import DocumentParserService
|
||
from src.application.services.rag_service import RAGService
|
||
from src.shared.exceptions import NotFoundError, ForbiddenError
|
||
|
||
|
||
class DocumentUseCases:
|
||
"""Use cases для документов"""
|
||
|
||
def __init__(
|
||
self,
|
||
document_repository: IDocumentRepository,
|
||
collection_repository: ICollectionRepository,
|
||
access_repository: ICollectionAccessRepository,
|
||
parser_service: DocumentParserService,
|
||
rag_service: Optional[RAGService] = None
|
||
):
|
||
self.document_repository = document_repository
|
||
self.collection_repository = collection_repository
|
||
self.access_repository = access_repository
|
||
self.parser_service = parser_service
|
||
self.rag_service = rag_service
|
||
|
||
async def _check_collection_access(self, user_id: UUID, collection) -> bool:
|
||
"""Проверить доступ пользователя к коллекции"""
|
||
if collection.owner_id == user_id:
|
||
return True
|
||
|
||
if collection.is_public:
|
||
return True
|
||
|
||
access = await self.access_repository.get_by_user_and_collection(user_id, collection.collection_id)
|
||
return access is not None
|
||
|
||
async def create_document(
|
||
self,
|
||
collection_id: UUID,
|
||
title: str,
|
||
content: str,
|
||
metadata: dict | None = None
|
||
) -> Document:
|
||
"""Создать документ"""
|
||
collection = await self.collection_repository.get_by_id(collection_id)
|
||
if not collection:
|
||
raise NotFoundError(f"Коллекция {collection_id} не найдена")
|
||
|
||
document = Document(
|
||
collection_id=collection_id,
|
||
title=title,
|
||
content=content,
|
||
metadata=metadata or {}
|
||
)
|
||
return await self.document_repository.create(document)
|
||
|
||
async def upload_and_parse_document(
|
||
self,
|
||
collection_id: UUID,
|
||
file: BinaryIO,
|
||
filename: str,
|
||
user_id: UUID
|
||
) -> Document:
|
||
"""Загрузить и распарсить документ, затем автоматически проиндексировать"""
|
||
collection = await self.collection_repository.get_by_id(collection_id)
|
||
if not collection:
|
||
raise NotFoundError(f"Коллекция {collection_id} не найдена")
|
||
|
||
has_access = await self._check_collection_access(user_id, collection)
|
||
if not has_access:
|
||
raise ForbiddenError("У вас нет доступа к этой коллекции")
|
||
|
||
title, content = await self.parser_service.parse_pdf(file, filename)
|
||
|
||
document = Document(
|
||
collection_id=collection_id,
|
||
title=title,
|
||
content=content,
|
||
metadata={"filename": filename}
|
||
)
|
||
document = await self.document_repository.create(document)
|
||
|
||
if self.rag_service:
|
||
try:
|
||
await self.rag_service.index_document(document)
|
||
except Exception as e:
|
||
print(f"Ошибка при автоматической индексации документа {document.document_id}: {e}")
|
||
|
||
return document
|
||
|
||
async def get_document(self, document_id: UUID) -> Document:
|
||
"""Получить документ по ID"""
|
||
document = await self.document_repository.get_by_id(document_id)
|
||
if not document:
|
||
raise NotFoundError(f"Документ {document_id} не найден")
|
||
return document
|
||
|
||
async def update_document(
|
||
self,
|
||
document_id: UUID,
|
||
user_id: UUID,
|
||
title: str | None = None,
|
||
content: str | None = None,
|
||
metadata: dict | None = None
|
||
) -> Document:
|
||
"""Обновить документ"""
|
||
document = await self.get_document(document_id)
|
||
|
||
collection = await self.collection_repository.get_by_id(document.collection_id)
|
||
if not collection:
|
||
raise NotFoundError(f"Коллекция {document.collection_id} не найдена")
|
||
has_access = await self._check_collection_access(user_id, collection)
|
||
if not has_access:
|
||
raise ForbiddenError("У вас нет доступа к этой коллекции")
|
||
|
||
if title is not None:
|
||
document.title = title
|
||
if content is not None:
|
||
document.content = content
|
||
if metadata is not None:
|
||
document.metadata = metadata
|
||
|
||
return await self.document_repository.update(document)
|
||
|
||
async def delete_document(self, document_id: UUID, user_id: UUID) -> bool:
|
||
"""Удалить документ"""
|
||
document = await self.get_document(document_id)
|
||
|
||
collection = await self.collection_repository.get_by_id(document.collection_id)
|
||
if not collection or collection.owner_id != user_id:
|
||
raise ForbiddenError("Только владелец коллекции может удалять документы")
|
||
|
||
return await self.document_repository.delete(document_id)
|
||
|
||
async def list_collection_documents(self, collection_id: UUID, skip: int = 0, limit: int = 100) -> list[Document]:
|
||
"""Получить документы коллекции"""
|
||
collection = await self.collection_repository.get_by_id(collection_id)
|
||
if not collection:
|
||
raise NotFoundError(f"Коллекция {collection_id} не найдена")
|
||
|
||
return await self.document_repository.list_by_collection(collection_id, skip=skip, limit=limit)
|
||
|