140 lines
5.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import aiohttp
from tg_bot.infrastructure.external.deepseek_client import DeepSeekClient
from tg_bot.config.settings import settings
BACKEND_URL = "http://localhost:8001/api/v1"
class RAGService:
def __init__(self):
self.deepseek_client = DeepSeekClient()
async def search_documents_in_collections(
self,
user_telegram_id: str,
query: str,
limit_per_collection: int = 5
) -> list[dict]:
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"{BACKEND_URL}/users/telegram/{user_telegram_id}"
) as user_response:
if user_response.status != 200:
return []
user_data = await user_response.json()
user_uuid = str(user_data.get("user_id"))
if not user_uuid:
return []
async with session.get(
f"{BACKEND_URL}/collections/",
headers={"X-Telegram-ID": user_telegram_id}
) as collections_response:
if collections_response.status != 200:
return []
collections = await collections_response.json()
all_documents = []
for collection in collections:
collection_id = collection.get("collection_id")
if not collection_id:
continue
try:
async with aiohttp.ClientSession() as search_session:
async with search_session.get(
f"{BACKEND_URL}/documents/collection/{collection_id}",
params={"search": query, "limit": limit_per_collection},
headers={"X-Telegram-ID": user_telegram_id}
) as search_response:
if search_response.status == 200:
documents = await search_response.json()
for doc in documents:
doc["collection_name"] = collection.get("name", "Unknown")
all_documents.append(doc)
except Exception as e:
print(f"Error searching collection {collection_id}: {e}")
continue
return all_documents[:20]
except Exception as e:
print(f"Error searching documents: {e}")
return []
async def generate_answer_with_rag(
self,
question: str,
user_telegram_id: str
) -> dict:
documents = await self.search_documents_in_collections(
user_telegram_id,
question
)
context_parts = []
sources = []
for doc in documents[:5]:
title = doc.get("title", "Без названия")
content = doc.get("content", "")[:1000]
collection_name = doc.get("collection_name", "Unknown")
context_parts.append(f"Документ: {title}\nКоллекция: {collection_name}\nСодержание: {content[:500]}...")
sources.append({
"title": title,
"collection": collection_name,
"document_id": doc.get("document_id")
})
context = "\n\n".join(context_parts) if context_parts else "Релевантные документы не найдены."
system_prompt = """Ты - помощник-юрист, который отвечает на вопросы на основе предоставленных документов.
Используй информацию из документов для формирования точного и полезного ответа.
Если в документах нет информации для ответа, честно скажи об этом."""
user_prompt = f"""Контекст из документов:
{context}
Вопрос пользователя: {question}
Ответь на вопрос, используя информацию из предоставленных документов. Если информации недостаточно, укажи это."""
try:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
response = await self.deepseek_client.chat_completion(
messages=messages,
temperature=0.7,
max_tokens=2000
)
return {
"answer": response.get("content", "Failed to generate answer"),
"sources": sources,
"usage": response.get("usage", {})
}
except Exception as e:
print(f"Error generating answer: {e}")
if documents:
return {
"answer": f"Found {len(documents)} documents but failed to generate answer",
"sources": sources[:3],
"usage": {}
}
else:
return {
"answer": "No relevant documents found",
"sources": [],
"usage": {}
}