BetterCallPraskovia/tg_bot/application/services/rag_service.py

import aiohttp
from tg_bot.infrastructure.external.deepseek_client import DeepSeekClient
from tg_bot.config.settings import settings

BACKEND_URL = "http://localhost:8001/api/v1"


class RAGService:

    def __init__(self):
        self.deepseek_client = DeepSeekClient()

    async def search_documents_in_collections(
            self,
            user_telegram_id: str,
            query: str,
            limit_per_collection: int = 5
    ) -> list[dict]:
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(
                        f"{BACKEND_URL}/users/telegram/{user_telegram_id}"
                ) as user_response:
                    if user_response.status != 200:
                        return []

                    user_data = await user_response.json()
                    user_uuid = str(user_data.get("user_id"))

                    if not user_uuid:
                        return []

                async with session.get(
                        f"{BACKEND_URL}/collections/",
                        headers={"X-Telegram-ID": user_telegram_id}
                ) as collections_response:
                    if collections_response.status != 200:
                        return []

                    collections = await collections_response.json()

            all_documents = []
            for collection in collections:
                collection_id = collection.get("collection_id")
                if not collection_id:
                    continue

                try:
                    async with aiohttp.ClientSession() as search_session:
                        async with search_session.get(
                                f"{BACKEND_URL}/documents/collection/{collection_id}",
                                params={"search": query, "limit": limit_per_collection},
                                headers={"X-Telegram-ID": user_telegram_id}
                        ) as search_response:
                            if search_response.status == 200:
                                documents = await search_response.json()
                                for doc in documents:
                                    doc["collection_name"] = collection.get("name", "Unknown")
                                    all_documents.append(doc)
                except Exception as e:
                    print(f"Error searching collection {collection_id}: {e}")
                    continue

            return all_documents[:20]

        except Exception as e:
            print(f"Error searching documents: {e}")
            return []

    async def generate_answer_with_rag(
            self,
            question: str,
            user_telegram_id: str
    ) -> dict:
        documents = await self.search_documents_in_collections(
            user_telegram_id,
            question
        )

        context_parts = []
        sources = []

        for doc in documents[:5]:
            title = doc.get("title", "Без названия")
            content = doc.get("content", "")[:1000]
            collection_name = doc.get("collection_name", "Unknown")

            context_parts.append(f"Документ: {title}\nКоллекция: {collection_name}\nСодержание: {content[:500]}...")
            sources.append({
                "title": title,
                "collection": collection_name,
                "document_id": doc.get("document_id")
            })

        context = "\n\n".join(context_parts) if context_parts else "Релевантные документы не найдены."

        system_prompt = """Ты - помощник-юрист, который отвечает на вопросы на основе предоставленных документов.
Используй информацию из документов для формирования точного и полезного ответа.
Если в документах нет информации для ответа, честно скажи об этом."""

        user_prompt = f"""Контекст из документов:
{context}

Вопрос пользователя: {question}

Ответь на вопрос, используя информацию из предоставленных документов. Если информации недостаточно, укажи это."""

        try:
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]

            response = await self.deepseek_client.chat_completion(
                messages=messages,
                temperature=0.7,
                max_tokens=2000
            )

            return {
                "answer": response.get("content", "Failed to generate answer"),
                "sources": sources,
                "usage": response.get("usage", {})
            }

        except Exception as e:
            print(f"Error generating answer: {e}")
            if documents:
                return {
                    "answer": f"Found {len(documents)} documents but failed to generate answer",
                    "sources": sources[:3],
                    "usage": {}
                }
            else:
                return {
                    "answer": "No relevant documents found",
                    "sources": [],
                    "usage": {}
                }