"""
Сервис парсинга документов
"""
from typing import BinaryIO
from src.infrastructure.external.yandex_ocr import YandexOCRService, YandexOCRError


class DocumentParserService:
    """Сервис для парсинга документов"""
    
    def __init__(self, ocr_service: YandexOCRService):
        self.ocr_service = ocr_service
    
    async def parse_pdf(self, file: BinaryIO, filename: str) -> tuple[str, str]:
        """
        Парсинг PDF файла
        
        Args:
            file: Файловый объект
            filename: Имя файла
            
        Returns:
            Кортеж (title, content)
            
        Raises:
            YandexOCRError: При ошибке распознавания
        """
        try:
            content = await self.ocr_service.parse_pdf(file)
            
            title = filename.rsplit(".", 1)[0] if "." in filename else filename
            
            if not content or not content.strip() or content.startswith("Ошибка распознавания:"):
                if not content or content.startswith("Ошибка распознавания:"):
                    pass
                else:
                    content = f"Документ {filename} загружен, но текст не был распознан."
            
            return title, content
        except YandexOCRError as e:
            title = filename.rsplit(".", 1)[0] if "." in filename else filename
            content = f" Ошибка распознавания документа: {str(e)}"
            return title, content
        except Exception as e:
            title = filename.rsplit(".", 1)[0] if "." in filename else filename
            content = f" Ошибка при парсинге документа: {str(e)}"
            return title, content
    
    async def parse_image(self, file: BinaryIO, filename: str) -> tuple[str, str]:
        """
        Парсинг изображения
        
        Args:
            file: Файловый объект изображения
            filename: Имя файла
            
        Returns:
            Кортеж (title, content)
        """
        try:
            content = await self.ocr_service.parse_image(file)
            title = filename.rsplit(".", 1)[0] if "." in filename else filename
            
            if not content or not content.strip():
                content = f"Изображение {filename} загружено, но текст не был распознан."
            
            return title, content
        except YandexOCRError:
            raise

        except Exception as e:
            raise YandexOCRError(f"Ошибка при парсинге изображения: {str(e)}") from e