140 lines
5.4 KiB
Python
140 lines
5.4 KiB
Python
import aiohttp
|
||
from tg_bot.infrastructure.external.deepseek_client import DeepSeekClient
|
||
from tg_bot.config.settings import settings
|
||
|
||
BACKEND_URL = "http://localhost:8001/api/v1"
|
||
|
||
|
||
class RAGService:
|
||
|
||
def __init__(self):
|
||
self.deepseek_client = DeepSeekClient()
|
||
|
||
async def search_documents_in_collections(
|
||
self,
|
||
user_telegram_id: str,
|
||
query: str,
|
||
limit_per_collection: int = 5
|
||
) -> list[dict]:
|
||
try:
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(
|
||
f"{BACKEND_URL}/users/telegram/{user_telegram_id}"
|
||
) as user_response:
|
||
if user_response.status != 200:
|
||
return []
|
||
|
||
user_data = await user_response.json()
|
||
user_uuid = str(user_data.get("user_id"))
|
||
|
||
if not user_uuid:
|
||
return []
|
||
|
||
async with session.get(
|
||
f"{BACKEND_URL}/collections/",
|
||
headers={"X-Telegram-ID": user_telegram_id}
|
||
) as collections_response:
|
||
if collections_response.status != 200:
|
||
return []
|
||
|
||
collections = await collections_response.json()
|
||
|
||
all_documents = []
|
||
for collection in collections:
|
||
collection_id = collection.get("collection_id")
|
||
if not collection_id:
|
||
continue
|
||
|
||
try:
|
||
async with aiohttp.ClientSession() as search_session:
|
||
async with search_session.get(
|
||
f"{BACKEND_URL}/documents/collection/{collection_id}",
|
||
params={"search": query, "limit": limit_per_collection},
|
||
headers={"X-Telegram-ID": user_telegram_id}
|
||
) as search_response:
|
||
if search_response.status == 200:
|
||
documents = await search_response.json()
|
||
for doc in documents:
|
||
doc["collection_name"] = collection.get("name", "Unknown")
|
||
all_documents.append(doc)
|
||
except Exception as e:
|
||
print(f"Error searching collection {collection_id}: {e}")
|
||
continue
|
||
|
||
return all_documents[:20]
|
||
|
||
except Exception as e:
|
||
print(f"Error searching documents: {e}")
|
||
return []
|
||
|
||
async def generate_answer_with_rag(
|
||
self,
|
||
question: str,
|
||
user_telegram_id: str
|
||
) -> dict:
|
||
documents = await self.search_documents_in_collections(
|
||
user_telegram_id,
|
||
question
|
||
)
|
||
|
||
context_parts = []
|
||
sources = []
|
||
|
||
for doc in documents[:5]:
|
||
title = doc.get("title", "Без названия")
|
||
content = doc.get("content", "")[:1000]
|
||
collection_name = doc.get("collection_name", "Unknown")
|
||
|
||
context_parts.append(f"Документ: {title}\nКоллекция: {collection_name}\nСодержание: {content[:500]}...")
|
||
sources.append({
|
||
"title": title,
|
||
"collection": collection_name,
|
||
"document_id": doc.get("document_id")
|
||
})
|
||
|
||
context = "\n\n".join(context_parts) if context_parts else "Релевантные документы не найдены."
|
||
|
||
system_prompt = """Ты - помощник-юрист, который отвечает на вопросы на основе предоставленных документов.
|
||
Используй информацию из документов для формирования точного и полезного ответа.
|
||
Если в документах нет информации для ответа, честно скажи об этом."""
|
||
|
||
user_prompt = f"""Контекст из документов:
|
||
{context}
|
||
|
||
Вопрос пользователя: {question}
|
||
|
||
Ответь на вопрос, используя информацию из предоставленных документов. Если информации недостаточно, укажи это."""
|
||
|
||
try:
|
||
messages = [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt}
|
||
]
|
||
|
||
response = await self.deepseek_client.chat_completion(
|
||
messages=messages,
|
||
temperature=0.7,
|
||
max_tokens=2000
|
||
)
|
||
|
||
return {
|
||
"answer": response.get("content", "Failed to generate answer"),
|
||
"sources": sources,
|
||
"usage": response.get("usage", {})
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"Error generating answer: {e}")
|
||
if documents:
|
||
return {
|
||
"answer": f"Found {len(documents)} documents but failed to generate answer",
|
||
"sources": sources[:3],
|
||
"usage": {}
|
||
}
|
||
else:
|
||
return {
|
||
"answer": "No relevant documents found",
|
||
"sources": [],
|
||
"usage": {}
|
||
}
|