forked from HSE_team/BetterCallPraskovia
Compare commits
23 Commits
andrewbokh
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b0bbc739f3 | |||
| 42fcc0eb16 | |||
| 683f779c31 | |||
| ef71c67683 | |||
| 570f0b7ea7 | |||
| 1b550e6503 | |||
| 66392765b9 | |||
| 8cbc318c33 | |||
| 908e8fc435 | |||
| 5a46194d41 | |||
| 0b04ffefd2 | |||
| 5264b3b64c | |||
| 5809ac5688 | |||
| 8bdacb4f7a | |||
| 1ce1c23d10 | |||
| 5da6c32722 | |||
| 6b768261e2 | |||
| 6934220b52 | |||
| 79980eb313 | |||
| 9f111ad2c2 | |||
| 7b7165a44b | |||
| 193deb7a8c | |||
| 49c3d1b0fd |
@ -13,6 +13,7 @@ trigger:
|
||||
steps:
|
||||
- name: deploy-backend
|
||||
image: appleboy/drone-ssh
|
||||
timeout: 30m
|
||||
settings:
|
||||
host:
|
||||
from_secret: server_host
|
||||
@ -21,10 +22,13 @@ steps:
|
||||
password:
|
||||
from_secret: server_password
|
||||
port: 22
|
||||
command_timeout: 30m
|
||||
script:
|
||||
- cd BETTERCALLPRASKOVIA
|
||||
- cd BetterCallPraskovia
|
||||
- git pull origin main
|
||||
- docker-compose stop backend tg_bot
|
||||
- docker-compose up --build -d backend tg_bot
|
||||
- docker system prune -f
|
||||
- docker-compose rm -f backend tg_bot
|
||||
- docker-compose build backend tg_bot
|
||||
- docker-compose up -d --no-deps backend tg_bot
|
||||
- docker image prune -f
|
||||
|
||||
1619
AI_api.yaml
1619
AI_api.yaml
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
|
||||
33
backend/alembic/versions/003_remove_embeddings_table.py
Normal file
33
backend/alembic/versions/003_remove_embeddings_table.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""Remove unused embeddings table
|
||||
|
||||
Revision ID: 003
|
||||
Revises: 002
|
||||
Create Date: 2024-12-24 12:00:00.000000
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
revision = '003'
|
||||
down_revision = '002'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.drop_table('embeddings')
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.create_table(
|
||||
'embeddings',
|
||||
sa.Column('embedding_id', sa.dialects.postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('document_id', sa.dialects.postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('embedding', sa.dialects.postgresql.JSON(astext_type=sa.Text()), nullable=True),
|
||||
sa.Column('model_version', sa.String(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(), nullable=False),
|
||||
sa.ForeignKeyConstraint(['document_id'], ['documents.document_id'], ),
|
||||
sa.PrimaryKeyConstraint('embedding_id')
|
||||
)
|
||||
|
||||
@ -67,6 +67,8 @@ class DocumentParserService:
|
||||
return title, content
|
||||
except YandexOCRError:
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
raise YandexOCRError(f"Ошибка при парсинге изображения: {str(e)}") from e
|
||||
|
||||
|
||||
|
||||
@ -42,9 +42,18 @@ class RAGService:
|
||||
)
|
||||
)
|
||||
|
||||
embeddings = self.embedding_service.embed_texts([c.content for c in chunks])
|
||||
EMBEDDING_BATCH_SIZE = 50
|
||||
all_embeddings: list[list[float]] = []
|
||||
|
||||
for i in range(0, len(chunks), EMBEDDING_BATCH_SIZE):
|
||||
batch_chunks = chunks[i:i + EMBEDDING_BATCH_SIZE]
|
||||
batch_texts = [c.content for c in batch_chunks]
|
||||
batch_embeddings = self.embedding_service.embed_texts(batch_texts)
|
||||
all_embeddings.extend(batch_embeddings)
|
||||
|
||||
print(f"Created {len(all_embeddings)} embeddings, upserting to Qdrant...")
|
||||
await self.vector_repository.upsert_chunks(
|
||||
chunks, embeddings, model_version=self.embedding_service.model_version()
|
||||
chunks, all_embeddings, model_version=self.embedding_service.model_version()
|
||||
)
|
||||
return chunks
|
||||
|
||||
|
||||
@ -39,5 +39,10 @@ class TextSplitter:
|
||||
|
||||
def _split_sentences(self, text: str) -> Iterable[str]:
|
||||
parts = re.split(r"(?<=[\.\?\!])\s+", text)
|
||||
if len(parts) == 1 and len(text) > self.chunk_size * 2:
|
||||
chunk_text = []
|
||||
for i in range(0, len(text), self.chunk_size):
|
||||
chunk_text.append(text[i:i + self.chunk_size])
|
||||
return chunk_text
|
||||
return [p.strip() for p in parts if p.strip()]
|
||||
|
||||
|
||||
@ -3,12 +3,15 @@ Use cases для работы с документами
|
||||
"""
|
||||
from uuid import UUID
|
||||
from typing import BinaryIO, Optional
|
||||
import httpx
|
||||
from src.domain.entities.document import Document
|
||||
from src.domain.repositories.document_repository import IDocumentRepository
|
||||
from src.domain.repositories.collection_repository import ICollectionRepository
|
||||
from src.domain.repositories.collection_access_repository import ICollectionAccessRepository
|
||||
from src.application.services.document_parser_service import DocumentParserService
|
||||
from src.application.services.rag_service import RAGService
|
||||
from src.shared.exceptions import NotFoundError, ForbiddenError
|
||||
from src.shared.config import settings
|
||||
|
||||
|
||||
class DocumentUseCases:
|
||||
@ -19,12 +22,14 @@ class DocumentUseCases:
|
||||
document_repository: IDocumentRepository,
|
||||
collection_repository: ICollectionRepository,
|
||||
access_repository: ICollectionAccessRepository,
|
||||
parser_service: DocumentParserService
|
||||
parser_service: DocumentParserService,
|
||||
rag_service: Optional[RAGService] = None
|
||||
):
|
||||
self.document_repository = document_repository
|
||||
self.collection_repository = collection_repository
|
||||
self.access_repository = access_repository
|
||||
self.parser_service = parser_service
|
||||
self.rag_service = rag_service
|
||||
|
||||
async def _check_collection_access(self, user_id: UUID, collection) -> bool:
|
||||
"""Проверить доступ пользователя к коллекции"""
|
||||
@ -57,14 +62,36 @@ class DocumentUseCases:
|
||||
)
|
||||
return await self.document_repository.create(document)
|
||||
|
||||
async def _send_telegram_notification(self, telegram_id: str, message: str):
|
||||
"""Отправить уведомление пользователю через Telegram Bot API"""
|
||||
if not settings.TELEGRAM_BOT_TOKEN:
|
||||
return
|
||||
|
||||
try:
|
||||
url = f"https://api.telegram.org/bot{settings.TELEGRAM_BOT_TOKEN}/sendMessage"
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
json={
|
||||
"chat_id": telegram_id,
|
||||
"text": message,
|
||||
"parse_mode": "HTML"
|
||||
}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to send Telegram notification: {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f"Error sending Telegram notification: {e}")
|
||||
|
||||
async def upload_and_parse_document(
|
||||
self,
|
||||
collection_id: UUID,
|
||||
file: BinaryIO,
|
||||
filename: str,
|
||||
user_id: UUID
|
||||
user_id: UUID,
|
||||
telegram_id: Optional[str] = None
|
||||
) -> Document:
|
||||
"""Загрузить и распарсить документ"""
|
||||
"""Загрузить и распарсить документ, затем автоматически проиндексировать"""
|
||||
collection = await self.collection_repository.get_by_id(collection_id)
|
||||
if not collection:
|
||||
raise NotFoundError(f"Коллекция {collection_id} не найдена")
|
||||
@ -81,7 +108,41 @@ class DocumentUseCases:
|
||||
content=content,
|
||||
metadata={"filename": filename}
|
||||
)
|
||||
return await self.document_repository.create(document)
|
||||
document = await self.document_repository.create(document)
|
||||
|
||||
if self.rag_service and telegram_id:
|
||||
try:
|
||||
await self._send_telegram_notification(
|
||||
telegram_id,
|
||||
"🔄 <b>Начинаю индексацию документа...</b>\n\n"
|
||||
f"📄 <b>Документ:</b> {title}\n\n"
|
||||
f"Это может занять некоторое время.\n"
|
||||
f"Вы получите уведомление по завершении."
|
||||
)
|
||||
|
||||
chunks = await self.rag_service.index_document(document)
|
||||
|
||||
await self._send_telegram_notification(
|
||||
telegram_id,
|
||||
"✅ <b>Индексация завершена!</b>\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"📄 <b>Документ:</b> {title}\n"
|
||||
f"📊 <b>Проиндексировано чанков:</b> {len(chunks)}\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"💡 <b>Теперь вы можете задавать вопросы по этому документу!</b>\n"
|
||||
f"Просто напишите ваш вопрос, и я найду ответ на основе загруженного документа."
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Ошибка при автоматической индексации документа {document.document_id}: {e}")
|
||||
if telegram_id:
|
||||
await self._send_telegram_notification(
|
||||
telegram_id,
|
||||
"⚠️ <b>Ошибка при индексации</b>\n\n"
|
||||
f"Документ загружен, но индексация не завершена.\n"
|
||||
f"Ошибка: {str(e)[:200]}"
|
||||
)
|
||||
|
||||
return document
|
||||
|
||||
async def get_document(self, document_id: UUID) -> Document:
|
||||
"""Получить документ по ID"""
|
||||
|
||||
@ -1,25 +0,0 @@
|
||||
"""
|
||||
Доменная сущность Embedding
|
||||
"""
|
||||
from datetime import datetime
|
||||
from uuid import UUID, uuid4
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Embedding:
|
||||
"""Эмбеддинг документа"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
document_id: UUID,
|
||||
embedding: list[float] | None = None,
|
||||
model_version: str = "",
|
||||
embedding_id: UUID | None = None,
|
||||
created_at: datetime | None = None
|
||||
):
|
||||
self.embedding_id = embedding_id or uuid4()
|
||||
self.document_id = document_id
|
||||
self.embedding = embedding or []
|
||||
self.model_version = model_version
|
||||
self.created_at = created_at or datetime.utcnow()
|
||||
|
||||
@ -53,19 +53,6 @@ class DocumentModel(Base):
|
||||
document_metadata = Column("metadata", JSON, nullable=True, default={})
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
collection = relationship("CollectionModel", back_populates="documents")
|
||||
embeddings = relationship("EmbeddingModel", back_populates="document", cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class EmbeddingModel(Base):
|
||||
"""Модель эмбеддинга (заглушка)"""
|
||||
__tablename__ = "embeddings"
|
||||
|
||||
embedding_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
document_id = Column(UUID(as_uuid=True), ForeignKey("documents.document_id"), nullable=False)
|
||||
embedding = Column(JSON, nullable=True)
|
||||
model_version = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
document = relationship("DocumentModel", back_populates="embeddings")
|
||||
|
||||
|
||||
class ConversationModel(Base):
|
||||
|
||||
@ -36,6 +36,8 @@ class QdrantVectorRepository(IVectorRepository):
|
||||
embeddings: Sequence[list[float]],
|
||||
model_version: str,
|
||||
) -> None:
|
||||
BATCH_SIZE = 100
|
||||
|
||||
points = []
|
||||
for chunk, vector in zip(chunks, embeddings):
|
||||
points.append(
|
||||
@ -52,6 +54,12 @@ class QdrantVectorRepository(IVectorRepository):
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
if len(points) >= BATCH_SIZE:
|
||||
self.client.upsert(collection_name=self.collection_name, points=points)
|
||||
points = []
|
||||
|
||||
if points:
|
||||
self.client.upsert(collection_name=self.collection_name, points=points)
|
||||
|
||||
async def search(
|
||||
|
||||
@ -69,7 +69,8 @@ async def upload_document(
|
||||
collection_id=collection_id,
|
||||
file=file.file,
|
||||
filename=file.filename,
|
||||
user_id=current_user.user_id
|
||||
user_id=current_user.user_id,
|
||||
telegram_id=current_user.telegram_id
|
||||
)
|
||||
return DocumentResponse.from_entity(document)
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
"""
|
||||
API для RAG: индексация документов и ответы на вопросы
|
||||
API для RAG: ответы на вопросы
|
||||
"""
|
||||
from fastapi import APIRouter, status, Request
|
||||
from typing import Annotated
|
||||
@ -9,30 +9,13 @@ from src.presentation.middleware.auth_middleware import get_current_user
|
||||
from src.presentation.schemas.rag_schemas import (
|
||||
QuestionRequest,
|
||||
RAGAnswer,
|
||||
IndexDocumentRequest,
|
||||
IndexDocumentResponse,
|
||||
)
|
||||
from src.application.use_cases.rag_use_cases import RAGUseCases
|
||||
from src.domain.entities.user import User
|
||||
|
||||
|
||||
router = APIRouter(prefix="/rag", tags=["rag"])
|
||||
|
||||
|
||||
@router.post("/index", response_model=IndexDocumentResponse, status_code=status.HTTP_200_OK)
|
||||
@inject
|
||||
async def index_document(
|
||||
body: IndexDocumentRequest,
|
||||
request: Request,
|
||||
user_repo: Annotated[IUserRepository, FromDishka()],
|
||||
use_cases: Annotated[RAGUseCases, FromDishka()],
|
||||
):
|
||||
"""Индексирование идет через чанкирование, далее эмбеддинг и загрузка в векторную бд"""
|
||||
current_user = await get_current_user(request, user_repo)
|
||||
result = await use_cases.index_document(body.document_id)
|
||||
return IndexDocumentResponse(**result)
|
||||
|
||||
|
||||
@router.post("/question", response_model=RAGAnswer, status_code=status.HTTP_200_OK)
|
||||
@inject
|
||||
async def ask_question(
|
||||
|
||||
@ -26,10 +26,3 @@ class RAGAnswer(BaseModel):
|
||||
usage: dict[str, Any] = {}
|
||||
|
||||
|
||||
class IndexDocumentRequest(BaseModel):
|
||||
document_id: UUID
|
||||
|
||||
|
||||
class IndexDocumentResponse(BaseModel):
|
||||
chunks_indexed: int
|
||||
|
||||
|
||||
@ -152,9 +152,10 @@ class UseCaseProvider(Provider):
|
||||
document_repo: IDocumentRepository,
|
||||
collection_repo: ICollectionRepository,
|
||||
access_repo: ICollectionAccessRepository,
|
||||
parser_service: DocumentParserService
|
||||
parser_service: DocumentParserService,
|
||||
rag_service: RAGService
|
||||
) -> DocumentUseCases:
|
||||
return DocumentUseCases(document_repo, collection_repo, access_repo, parser_service)
|
||||
return DocumentUseCases(document_repo, collection_repo, access_repo, parser_service, rag_service)
|
||||
|
||||
@provide(scope=Scope.REQUEST)
|
||||
def get_conversation_use_cases(
|
||||
|
||||
@ -1,93 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
from sqlalchemy import create_engine, inspect
|
||||
from sqlalchemy.orm import declarative_base, Session
|
||||
from sqlalchemy import Column, String, DateTime, Boolean, Integer, Text
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
DB_PATH = os.path.join(BASE_DIR, 'data', 'bot.db')
|
||||
DATABASE_URL = f"sqlite:///{DB_PATH}"
|
||||
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
|
||||
if os.path.exists(DB_PATH):
|
||||
try:
|
||||
temp_engine = create_engine(DATABASE_URL)
|
||||
inspector = inspect(temp_engine)
|
||||
tables = inspector.get_table_names()
|
||||
if tables:
|
||||
sys.exit(0)
|
||||
except:
|
||||
pass
|
||||
|
||||
choice = input("Перезаписать БД? (y/N): ")
|
||||
if choice.lower() != 'y':
|
||||
sys.exit(0)
|
||||
|
||||
engine = create_engine(DATABASE_URL, echo=False)
|
||||
Base = declarative_base()
|
||||
|
||||
class UserModel(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
user_id = Column("user_id", String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
telegram_id = Column("telegram_id", String(100), nullable=False, unique=True)
|
||||
created_at = Column("created_at", DateTime, default=datetime.utcnow, nullable=False)
|
||||
role = Column("role", String(20), default="user", nullable=False)
|
||||
is_premium = Column(Boolean, default=False, nullable=False)
|
||||
premium_until = Column(DateTime, nullable=True)
|
||||
questions_used = Column(Integer, default=0, nullable=False)
|
||||
username = Column(String(100), nullable=True)
|
||||
first_name = Column(String(100), nullable=True)
|
||||
last_name = Column(String(100), nullable=True)
|
||||
|
||||
class PaymentModel(Base):
|
||||
__tablename__ = "payments"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
payment_id = Column(String(36), default=lambda: str(uuid.uuid4()), nullable=False, unique=True)
|
||||
user_id = Column(Integer, nullable=False)
|
||||
amount = Column(String(20), nullable=False)
|
||||
currency = Column(String(3), default="RUB", nullable=False)
|
||||
status = Column(String(20), default="pending", nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
yookassa_payment_id = Column(String(100), unique=True, nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
try:
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
session = Session(bind=engine)
|
||||
|
||||
existing = session.query(UserModel).filter_by(telegram_id="123456789").first()
|
||||
if not existing:
|
||||
test_user = UserModel(
|
||||
telegram_id="123456789",
|
||||
username="test_user",
|
||||
first_name="Test",
|
||||
last_name="User",
|
||||
is_premium=True
|
||||
)
|
||||
session.add(test_user)
|
||||
|
||||
existing_payment = session.query(PaymentModel).filter_by(yookassa_payment_id="test_yoo_001").first()
|
||||
if not existing_payment:
|
||||
test_payment = PaymentModel(
|
||||
user_id=123456789,
|
||||
amount="500.00",
|
||||
status="succeeded",
|
||||
description="Test payment",
|
||||
yookassa_payment_id="test_yoo_001"
|
||||
)
|
||||
session.add(test_payment)
|
||||
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ошибка: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
@ -1,31 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
try:
|
||||
from tg_bot.infrastructure.database.database import engine, Base
|
||||
from tg_bot.infrastructure.database import models
|
||||
|
||||
print("СОЗДАНИЕ ТАБЛИЦ БАЗЫ ДАННЫХ")
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
print("Таблицы успешно созданы!")
|
||||
print(" • users")
|
||||
print(" • payments")
|
||||
print()
|
||||
print(f"База данных: {engine.url}")
|
||||
|
||||
db_path = "data/bot.db"
|
||||
if os.path.exists(db_path):
|
||||
size = os.path.getsize(db_path)
|
||||
print(f"Размер файла: {size} байт")
|
||||
else:
|
||||
print("Файл БД не найден, но таблицы созданы")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ошибка: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
print("=" * 50)
|
||||
@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
COPY . ./tg_bot/
|
||||
|
||||
|
||||
@ -1,137 +1,130 @@
|
||||
import aiohttp
|
||||
from tg_bot.infrastructure.external.deepseek_client import DeepSeekClient
|
||||
"""
|
||||
RAG сервис для бота - вызывает API бэкенда
|
||||
"""
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.infrastructure.http_client import create_http_session
|
||||
|
||||
|
||||
class RAGService:
|
||||
"""Сервис для работы с RAG через API бэкенда"""
|
||||
|
||||
def __init__(self):
|
||||
self.deepseek_client = DeepSeekClient()
|
||||
|
||||
async def search_documents_in_collections(
|
||||
async def get_or_create_conversation(
|
||||
self,
|
||||
user_telegram_id: str,
|
||||
query: str,
|
||||
limit_per_collection: int = 5
|
||||
) -> list[dict]:
|
||||
collection_id: str = None
|
||||
) -> str | None:
|
||||
"""Получить или создать беседу для пользователя"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/users/telegram/{user_telegram_id}"
|
||||
) as user_response:
|
||||
if user_response.status != 200:
|
||||
return []
|
||||
|
||||
user_data = await user_response.json()
|
||||
user_uuid = str(user_data.get("user_id"))
|
||||
|
||||
if not user_uuid:
|
||||
return []
|
||||
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/collections/",
|
||||
headers={"X-Telegram-ID": user_telegram_id}
|
||||
) as collections_response:
|
||||
if collections_response.status != 200:
|
||||
return []
|
||||
|
||||
return None
|
||||
collections = await collections_response.json()
|
||||
|
||||
all_documents = []
|
||||
for collection in collections:
|
||||
collection_id = collection.get("collection_id")
|
||||
if not collections:
|
||||
if not collection_id:
|
||||
continue
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as search_session:
|
||||
async with search_session.get(
|
||||
f"{settings.BACKEND_URL}/documents/collection/{collection_id}",
|
||||
params={"search": query, "limit": limit_per_collection},
|
||||
async with session.post(
|
||||
f"{settings.BACKEND_URL}/collections",
|
||||
json={
|
||||
"name": "Основная коллекция",
|
||||
"description": "Коллекция по умолчанию",
|
||||
"is_public": False
|
||||
},
|
||||
headers={"X-Telegram-ID": user_telegram_id}
|
||||
) as search_response:
|
||||
if search_response.status == 200:
|
||||
documents = await search_response.json()
|
||||
for doc in documents:
|
||||
doc["collection_name"] = collection.get("name", "Unknown")
|
||||
all_documents.append(doc)
|
||||
except Exception as e:
|
||||
print(f"Error searching collection {collection_id}: {e}")
|
||||
continue
|
||||
) as create_collection_response:
|
||||
if create_collection_response.status in [200, 201]:
|
||||
collection_data = await create_collection_response.json()
|
||||
collection_id = collection_data.get("collection_id")
|
||||
else:
|
||||
collection_id = collection_id
|
||||
else:
|
||||
collection_id = collections[0].get("collection_id")
|
||||
|
||||
return all_documents[:20]
|
||||
if not collection_id:
|
||||
return None
|
||||
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/conversations",
|
||||
headers={"X-Telegram-ID": user_telegram_id}
|
||||
) as conversations_response:
|
||||
if conversations_response.status == 200:
|
||||
conversations = await conversations_response.json()
|
||||
for conv in conversations:
|
||||
if conv.get("collection_id") == str(collection_id):
|
||||
return conv.get("conversation_id")
|
||||
|
||||
async with session.post(
|
||||
f"{settings.BACKEND_URL}/conversations",
|
||||
json={"collection_id": str(collection_id)},
|
||||
headers={"X-Telegram-ID": user_telegram_id}
|
||||
) as create_conversation_response:
|
||||
if create_conversation_response.status in [200, 201]:
|
||||
conversation_data = await create_conversation_response.json()
|
||||
return conversation_data.get("conversation_id")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error searching documents: {e}")
|
||||
return []
|
||||
print(f"Error getting/creating conversation: {e}")
|
||||
return None
|
||||
|
||||
async def generate_answer_with_rag(
|
||||
self,
|
||||
question: str,
|
||||
user_telegram_id: str
|
||||
) -> dict:
|
||||
documents = await self.search_documents_in_collections(
|
||||
user_telegram_id,
|
||||
question
|
||||
)
|
||||
|
||||
context_parts = []
|
||||
sources = []
|
||||
|
||||
for doc in documents[:5]:
|
||||
title = doc.get("title", "Без названия")
|
||||
content = doc.get("content", "")[:1000]
|
||||
collection_name = doc.get("collection_name", "Unknown")
|
||||
|
||||
context_parts.append(f"Документ: {title}\nКоллекция: {collection_name}\nСодержание: {content[:500]}...")
|
||||
sources.append({
|
||||
"title": title,
|
||||
"collection": collection_name,
|
||||
"document_id": doc.get("document_id")
|
||||
})
|
||||
|
||||
context = "\n\n".join(context_parts) if context_parts else "Релевантные документы не найдены."
|
||||
|
||||
system_prompt = """Ты - помощник-юрист, который отвечает на вопросы на основе предоставленных документов.
|
||||
Используй информацию из документов для формирования точного и полезного ответа.
|
||||
Если в документах нет информации для ответа, честно скажи об этом."""
|
||||
|
||||
user_prompt = f"""Контекст из документов:
|
||||
{context}
|
||||
|
||||
Вопрос пользователя: {question}
|
||||
|
||||
Ответь на вопрос, используя информацию из предоставленных документов. Если информации недостаточно, укажи это."""
|
||||
|
||||
"""Генерирует ответ используя RAG через API бэкенда"""
|
||||
try:
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
response = await self.deepseek_client.chat_completion(
|
||||
messages=messages,
|
||||
temperature=0.7,
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
conversation_id = await self.get_or_create_conversation(user_telegram_id)
|
||||
if not conversation_id:
|
||||
return {
|
||||
"answer": response.get("content", "Failed to generate answer"),
|
||||
"sources": sources,
|
||||
"usage": response.get("usage", {})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating answer: {e}")
|
||||
if documents:
|
||||
return {
|
||||
"answer": f"Found {len(documents)} documents but failed to generate answer",
|
||||
"sources": sources[:3],
|
||||
"usage": {}
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"answer": "No relevant documents found",
|
||||
"answer": "Не удалось создать беседу. Попробуйте позже.",
|
||||
"sources": [],
|
||||
"usage": {}
|
||||
}
|
||||
|
||||
async with create_http_session() as session:
|
||||
async with session.post(
|
||||
f"{settings.BACKEND_URL}/rag/question",
|
||||
json={
|
||||
"conversation_id": str(conversation_id),
|
||||
"question": question,
|
||||
"top_k": 20,
|
||||
"rerank_top_n": 5
|
||||
},
|
||||
headers={"X-Telegram-ID": user_telegram_id}
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
sources = []
|
||||
for source in result.get("sources", []):
|
||||
sources.append({
|
||||
"title": source.get("title", "Без названия"),
|
||||
"document_id": source.get("document_id"),
|
||||
"chunk_id": source.get("chunk_id"),
|
||||
"index": source.get("index", 0)
|
||||
})
|
||||
|
||||
return {
|
||||
"answer": result.get("answer", "Не удалось сгенерировать ответ."),
|
||||
"sources": sources,
|
||||
"usage": result.get("usage", {}),
|
||||
"conversation_id": str(conversation_id)
|
||||
}
|
||||
else:
|
||||
error_text = await response.text()
|
||||
print(f"RAG API error: {response.status} - {error_text}")
|
||||
return {
|
||||
"answer": "Ошибка при генерации ответа. Попробуйте позже.",
|
||||
"sources": [],
|
||||
"usage": {}
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Error generating answer with RAG: {e}")
|
||||
return {
|
||||
"answer": "Произошла ошибка при генерации ответа. Попробуйте позже.",
|
||||
"sources": [],
|
||||
"usage": {}
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Настройки приложения (загружаются из .env файла в корне проекта)"""
|
||||
"""Настройки приложения получаеи из env файла, тут не ищи, мы спрятали:)"""
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
@ -16,7 +16,7 @@ class Settings(BaseSettings):
|
||||
VERSION: str = "0.1.0"
|
||||
DEBUG: bool = False
|
||||
|
||||
TELEGRAM_BOT_TOKEN: str = ""
|
||||
TELEGRAM_BOT_TOKEN: str
|
||||
|
||||
FREE_QUESTIONS_LIMIT: int = 5
|
||||
PAYMENT_AMOUNT: float = 500.0
|
||||
@ -25,8 +25,8 @@ class Settings(BaseSettings):
|
||||
LOG_FILE: str = "logs/bot.log"
|
||||
|
||||
|
||||
YOOKASSA_SHOP_ID: str = ""
|
||||
YOOKASSA_SECRET_KEY: str = ""
|
||||
YOOKASSA_SHOP_ID: str
|
||||
YOOKASSA_SECRET_KEY: str
|
||||
YOOKASSA_RETURN_URL: str = "https://t.me/vibelawyer_bot"
|
||||
YOOKASSA_WEBHOOK_SECRET: Optional[str] = None
|
||||
|
||||
@ -35,7 +35,7 @@ class Settings(BaseSettings):
|
||||
DEEPSEEK_API_URL: str = "https://api.deepseek.com/v1/chat/completions"
|
||||
|
||||
|
||||
BACKEND_URL: str = "http://localhost:8000/api/v1"
|
||||
BACKEND_URL: str
|
||||
|
||||
|
||||
ADMIN_IDS_STR: str = ""
|
||||
|
||||
@ -3,7 +3,7 @@ import aiohttp
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.infrastructure.http_client import create_http_session, normalize_backend_url
|
||||
from tg_bot.infrastructure.http_client import create_http_session
|
||||
|
||||
|
||||
class User:
|
||||
@ -40,7 +40,7 @@ class UserService:
|
||||
"""Сервис для работы с пользователями через API бэкенда"""
|
||||
|
||||
def __init__(self):
|
||||
self.backend_url = normalize_backend_url(settings.BACKEND_URL)
|
||||
self.backend_url = settings.BACKEND_URL
|
||||
print(f"UserService initialized with BACKEND_URL: {self.backend_url}")
|
||||
|
||||
async def get_user_by_telegram_id(self, telegram_id: int) -> Optional[User]:
|
||||
@ -48,7 +48,7 @@ class UserService:
|
||||
try:
|
||||
url = f"{self.backend_url}/users/telegram/{telegram_id}"
|
||||
async with create_http_session() as session:
|
||||
async with session.get(url, ssl=False) as response:
|
||||
async with session.get(url) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return User(data)
|
||||
@ -74,8 +74,7 @@ class UserService:
|
||||
async with create_http_session() as session:
|
||||
async with session.post(
|
||||
f"{self.backend_url}/users",
|
||||
json={"telegram_id": str(telegram_id), "role": "user"},
|
||||
ssl=False
|
||||
json={"telegram_id": str(telegram_id), "role": "user"}
|
||||
) as response:
|
||||
if response.status in [200, 201]:
|
||||
data = await response.json()
|
||||
@ -106,8 +105,7 @@ class UserService:
|
||||
try:
|
||||
async with create_http_session() as session:
|
||||
async with session.post(
|
||||
f"{self.backend_url}/users/telegram/{telegram_id}/increment-questions",
|
||||
ssl=False
|
||||
f"{self.backend_url}/users/telegram/{telegram_id}/increment-questions"
|
||||
) as response:
|
||||
return response.status == 200
|
||||
except Exception as e:
|
||||
@ -120,8 +118,7 @@ class UserService:
|
||||
async with create_http_session() as session:
|
||||
async with session.post(
|
||||
f"{self.backend_url}/users/telegram/{telegram_id}/activate-premium",
|
||||
params={"days": days},
|
||||
ssl=False
|
||||
params={"days": days}
|
||||
) as response:
|
||||
return response.status == 200
|
||||
except Exception as e:
|
||||
172
tg_bot/infrastructure/external/deepseek_client.py
vendored
172
tg_bot/infrastructure/external/deepseek_client.py
vendored
@ -1,172 +0,0 @@
|
||||
import json
|
||||
from typing import Optional, AsyncIterator
|
||||
import httpx
|
||||
from tg_bot.config.settings import settings
|
||||
|
||||
|
||||
class DeepSeekAPIError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DeepSeekClient:
|
||||
|
||||
def __init__(self, api_key: str | None = None, api_url: str | None = None):
|
||||
self.api_key = api_key or settings.DEEPSEEK_API_KEY
|
||||
self.api_url = api_url or settings.DEEPSEEK_API_URL
|
||||
self.timeout = 60.0
|
||||
|
||||
def _get_headers(self) -> dict[str, str]:
|
||||
if not self.api_key:
|
||||
raise DeepSeekAPIError("API key not set")
|
||||
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
async def chat_completion(
|
||||
self,
|
||||
messages: list[dict[str, str]],
|
||||
model: str = "deepseek-chat",
|
||||
temperature: float = 0.7,
|
||||
max_tokens: Optional[int] = None,
|
||||
stream: bool = False
|
||||
) -> dict:
|
||||
if not self.api_key:
|
||||
return {
|
||||
"content": "API key not configured",
|
||||
"usage": {
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"total_tokens": 0
|
||||
}
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"stream": stream
|
||||
}
|
||||
|
||||
if max_tokens is not None:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(
|
||||
self.api_url,
|
||||
headers=self._get_headers(),
|
||||
json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
|
||||
if "choices" in data and len(data["choices"]) > 0:
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
else:
|
||||
raise DeepSeekAPIError("Invalid response format")
|
||||
|
||||
usage = data.get("usage", {})
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"usage": {
|
||||
"prompt_tokens": usage.get("prompt_tokens", 0),
|
||||
"completion_tokens": usage.get("completion_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0)
|
||||
}
|
||||
}
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_msg = f"API error: {e.response.status_code}"
|
||||
try:
|
||||
error_data = e.response.json()
|
||||
if "error" in error_data:
|
||||
error_msg = error_data['error'].get('message', error_msg)
|
||||
except:
|
||||
pass
|
||||
raise DeepSeekAPIError(error_msg) from e
|
||||
except httpx.RequestError as e:
|
||||
raise DeepSeekAPIError(f"Connection error: {str(e)}") from e
|
||||
except Exception as e:
|
||||
raise DeepSeekAPIError(str(e)) from e
|
||||
|
||||
async def stream_chat_completion(
|
||||
self,
|
||||
messages: list[dict[str, str]],
|
||||
model: str = "deepseek-chat",
|
||||
temperature: float = 0.7,
|
||||
max_tokens: Optional[int] = None
|
||||
) -> AsyncIterator[str]:
|
||||
if not self.api_key:
|
||||
yield "API key not configured"
|
||||
return
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
if max_tokens is not None:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
self.api_url,
|
||||
headers=self._get_headers(),
|
||||
json=payload
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
if line.startswith("data: "):
|
||||
line = line[6:]
|
||||
|
||||
if line.strip() == "[DONE]":
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(line)
|
||||
|
||||
if "choices" in data and len(data["choices"]) > 0:
|
||||
delta = data["choices"][0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
yield content
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_msg = f"API error: {e.response.status_code}"
|
||||
try:
|
||||
error_data = e.response.json()
|
||||
if "error" in error_data:
|
||||
error_msg = error_data['error'].get('message', error_msg)
|
||||
except:
|
||||
pass
|
||||
raise DeepSeekAPIError(error_msg) from e
|
||||
except httpx.RequestError as e:
|
||||
raise DeepSeekAPIError(f"Connection error: {str(e)}") from e
|
||||
except Exception as e:
|
||||
raise DeepSeekAPIError(str(e)) from e
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
if not self.api_key:
|
||||
return False
|
||||
|
||||
try:
|
||||
test_messages = [{"role": "user", "content": "test"}]
|
||||
await self.chat_completion(test_messages, max_tokens=1)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@ -1,87 +1,23 @@
|
||||
"""HTTP client utilities for making requests to the backend API"""
|
||||
import aiohttp
|
||||
from typing import Optional
|
||||
import ssl
|
||||
import os
|
||||
|
||||
|
||||
def get_windows_host_ip() -> Optional[str]:
|
||||
"""
|
||||
Get the Windows host IP address when running in WSL.
|
||||
In WSL2, the Windows host IP is typically the first nameserver in /etc/resolv.conf.
|
||||
"""
|
||||
try:
|
||||
if os.path.exists("/etc/resolv.conf"):
|
||||
with open("/etc/resolv.conf", "r") as f:
|
||||
for line in f:
|
||||
if line.startswith("nameserver"):
|
||||
ip = line.split()[1]
|
||||
if ip not in ["127.0.0.1", "127.0.0.53"] and not ip.startswith("fe80"):
|
||||
return ip
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def normalize_backend_url(url: str) -> str:
|
||||
"""
|
||||
Normalize backend URL for better compatibility, especially on WSL and Docker.
|
||||
"""
|
||||
if not ("localhost" in url or "127.0.0.1" in url):
|
||||
return url
|
||||
if os.path.exists("/.dockerenv"):
|
||||
print(f"Warning: Running in Docker but URL contains localhost: {url}")
|
||||
print("Please set BACKEND_URL environment variable in docker-compose.yml to use Docker service name (e.g., http://backend:8000/api/v1)")
|
||||
return url.replace("localhost", "127.0.0.1")
|
||||
try:
|
||||
if os.path.exists("/proc/version"):
|
||||
with open("/proc/version", "r") as f:
|
||||
version_content = f.read().lower()
|
||||
if "microsoft" in version_content:
|
||||
windows_ip = get_windows_host_ip()
|
||||
if windows_ip:
|
||||
if "localhost" in url or "127.0.0.1" in url:
|
||||
url = url.replace("localhost", windows_ip).replace("127.0.0.1", windows_ip)
|
||||
print(f"WSL detected: Using Windows host IP {windows_ip} for backend connection")
|
||||
return url
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not detect WSL environment: {e}")
|
||||
|
||||
if url.startswith("http://localhost") or url.startswith("https://localhost"):
|
||||
return url.replace("localhost", "127.0.0.1")
|
||||
return url
|
||||
|
||||
|
||||
def create_http_session(timeout: Optional[aiohttp.ClientTimeout] = None) -> aiohttp.ClientSession:
|
||||
"""
|
||||
Create a configured aiohttp ClientSession for backend API requests.
|
||||
|
||||
Args:
|
||||
timeout: Optional timeout configuration. Defaults to 30 seconds total timeout.
|
||||
|
||||
Returns:
|
||||
Configured aiohttp.ClientSession
|
||||
Создаем сессию для запросов к бэку
|
||||
"""
|
||||
if timeout is None:
|
||||
timeout = aiohttp.ClientTimeout(total=30, connect=10)
|
||||
|
||||
connector = aiohttp.TCPConnector(
|
||||
ssl=False,
|
||||
limit=100,
|
||||
limit_per_host=30,
|
||||
force_close=True,
|
||||
enable_cleanup_closed=True
|
||||
limit_per_host=30
|
||||
)
|
||||
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
return aiohttp.ClientSession(
|
||||
connector=connector,
|
||||
timeout=timeout,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
@ -5,7 +5,7 @@ from decimal import Decimal
|
||||
import aiohttp
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.payment.yookassa.client import yookassa_client
|
||||
from tg_bot.domain.services.user_service import UserService
|
||||
from tg_bot.domain.user_service import UserService
|
||||
from datetime import datetime
|
||||
|
||||
router = Router()
|
||||
|
||||
@ -3,18 +3,32 @@ from aiogram.types import Message, InlineKeyboardMarkup, InlineKeyboardButton, C
|
||||
from aiogram.filters import Command, StateFilter
|
||||
from aiogram.fsm.context import FSMContext
|
||||
import aiohttp
|
||||
from urllib.parse import unquote
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.infrastructure.http_client import create_http_session
|
||||
from tg_bot.infrastructure.telegram.states.collection_states import (
|
||||
CollectionAccessStates,
|
||||
CollectionEditStates
|
||||
)
|
||||
|
||||
|
||||
def decode_title(title: str) -> str:
|
||||
if not title:
|
||||
return "Без названия"
|
||||
try:
|
||||
decoded = unquote(title)
|
||||
if decoded != title or '%' not in title:
|
||||
return decoded
|
||||
return title
|
||||
except Exception:
|
||||
return title
|
||||
|
||||
router = Router()
|
||||
|
||||
|
||||
async def get_user_collections(telegram_id: str):
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/collections/",
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -33,7 +47,7 @@ async def get_collection_documents(collection_id: str, telegram_id: str):
|
||||
url = f"{settings.BACKEND_URL}/documents/collection/{collection_id}"
|
||||
print(f"DEBUG get_collection_documents: URL={url}, collection_id={collection_id}, telegram_id={telegram_id}")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
url,
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -57,7 +71,7 @@ async def get_collection_documents(collection_id: str, telegram_id: str):
|
||||
|
||||
async def search_in_collection(collection_id: str, query: str, telegram_id: str):
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/documents/collection/{collection_id}",
|
||||
params={"search": query},
|
||||
@ -78,7 +92,7 @@ async def get_collection_info(collection_id: str, telegram_id: str):
|
||||
url = f"{settings.BACKEND_URL}/collections/{collection_id}"
|
||||
print(f"DEBUG get_collection_info: URL={url}, collection_id={collection_id}, telegram_id={telegram_id}")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
url,
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -103,7 +117,7 @@ async def get_collection_info(collection_id: str, telegram_id: str):
|
||||
async def get_collection_access_list(collection_id: str, telegram_id: str):
|
||||
"""Получить список пользователей с доступом к коллекции"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/collections/{collection_id}/access",
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -122,7 +136,7 @@ async def grant_collection_access(collection_id: str, telegram_id: str, owner_te
|
||||
url = f"{settings.BACKEND_URL}/collections/{collection_id}/access/telegram/{telegram_id}"
|
||||
print(f"DEBUG grant_collection_access: URL={url}, target_telegram_id={telegram_id}, owner_telegram_id={owner_telegram_id}")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.post(
|
||||
url,
|
||||
headers={"X-Telegram-ID": owner_telegram_id}
|
||||
@ -145,7 +159,7 @@ async def grant_collection_access(collection_id: str, telegram_id: str, owner_te
|
||||
async def revoke_collection_access(collection_id: str, telegram_id: str, owner_telegram_id: str):
|
||||
"""Отозвать доступ к коллекции"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.delete(
|
||||
f"{settings.BACKEND_URL}/collections/{collection_id}/access/telegram/{telegram_id}",
|
||||
headers={"X-Telegram-ID": owner_telegram_id}
|
||||
@ -242,7 +256,7 @@ async def cmd_search(message: Message):
|
||||
|
||||
response = f"<b>Результаты поиска:</b> \"{query}\"\n\n"
|
||||
for i, doc in enumerate(results[:5], 1):
|
||||
title = doc.get("title", "Без названия")
|
||||
title = decode_title(doc.get("title", "Без названия"))
|
||||
content = doc.get("content", "")[:200]
|
||||
response += f"{i}. <b>{title}</b>\n"
|
||||
response += f" <i>{content}...</i>\n\n"
|
||||
@ -281,7 +295,7 @@ async def show_collection_menu(callback: CallbackQuery):
|
||||
collection_name = collection_info.get("name", "Коллекция")
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/users/telegram/{telegram_id}"
|
||||
) as response:
|
||||
@ -377,7 +391,7 @@ async def show_collection_documents(callback: CallbackQuery):
|
||||
|
||||
for i, doc in enumerate(documents[:10], 1):
|
||||
doc_id = doc.get("document_id")
|
||||
title = doc.get("title", "Без названия")
|
||||
title = decode_title(doc.get("title", "Без названия"))
|
||||
content_preview = doc.get("content", "")[:100]
|
||||
response += f"{i}. <b>{title}</b>\n"
|
||||
if content_preview:
|
||||
@ -673,7 +687,7 @@ async def process_edit_collection_description(message: Message, state: FSMContex
|
||||
if new_description:
|
||||
update_data["description"] = new_description
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.put(
|
||||
f"{settings.BACKEND_URL}/collections/{collection_id}",
|
||||
json=update_data,
|
||||
|
||||
@ -6,19 +6,33 @@ from aiogram.types import Message, InlineKeyboardMarkup, InlineKeyboardButton, C
|
||||
from aiogram.filters import StateFilter
|
||||
from aiogram.fsm.context import FSMContext
|
||||
import aiohttp
|
||||
from urllib.parse import unquote
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.infrastructure.http_client import create_http_session
|
||||
from tg_bot.infrastructure.telegram.states.collection_states import (
|
||||
DocumentEditStates,
|
||||
DocumentUploadStates
|
||||
)
|
||||
|
||||
|
||||
def decode_title(title: str) -> str:
|
||||
"""Декодирует URL-encoded название документа"""
|
||||
if not title:
|
||||
return "Без названия"
|
||||
try:
|
||||
decoded = unquote(title)
|
||||
if decoded != title or '%' not in title:
|
||||
return decoded
|
||||
return title
|
||||
except Exception:
|
||||
return title
|
||||
router = Router()
|
||||
|
||||
|
||||
async def get_document_info(document_id: str, telegram_id: str):
|
||||
"""Получить информацию о документе"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/documents/{document_id}",
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -34,7 +48,7 @@ async def get_document_info(document_id: str, telegram_id: str):
|
||||
async def delete_document(document_id: str, telegram_id: str):
|
||||
"""Удалить документ"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.delete(
|
||||
f"{settings.BACKEND_URL}/documents/{document_id}",
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -54,7 +68,7 @@ async def update_document(document_id: str, telegram_id: str, title: str = None,
|
||||
if content:
|
||||
update_data["content"] = content
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.put(
|
||||
f"{settings.BACKEND_URL}/documents/{document_id}",
|
||||
json=update_data,
|
||||
@ -71,7 +85,7 @@ async def update_document(document_id: str, telegram_id: str, title: str = None,
|
||||
async def upload_document_to_collection(collection_id: str, file_data: bytes, filename: str, telegram_id: str):
|
||||
"""Загрузить документ в коллекцию"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
form_data = aiohttp.FormData()
|
||||
form_data.add_field('file', file_data, filename=filename, content_type='application/octet-stream')
|
||||
|
||||
@ -107,7 +121,7 @@ async def view_document(callback: CallbackQuery):
|
||||
)
|
||||
return
|
||||
|
||||
title = document.get("title", "Без названия")
|
||||
title = decode_title(document.get("title", "Без названия"))
|
||||
content = document.get("content", "")
|
||||
collection_id = document.get("collection_id")
|
||||
|
||||
@ -120,7 +134,7 @@ async def view_document(callback: CallbackQuery):
|
||||
response += "\n\n<i>...</i>"
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with create_http_session() as session:
|
||||
async with session.get(
|
||||
f"{settings.BACKEND_URL}/collections/{collection_id}",
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
@ -183,7 +197,7 @@ async def edit_document_prompt(callback: CallbackQuery, state: FSMContext):
|
||||
await callback.message.answer(
|
||||
"<b>Редактирование документа</b>\n\n"
|
||||
"Отправьте новое название документа или /skip чтобы оставить текущее.\n\n"
|
||||
f"Текущее название: <b>{document.get('title', 'Без названия')}</b>",
|
||||
f"Текущее название: <b>{decode_title(document.get('title', 'Без названия'))}</b>",
|
||||
parse_mode="HTML"
|
||||
)
|
||||
await callback.answer()
|
||||
@ -359,8 +373,9 @@ async def process_upload_document(message: Message, state: FSMContext):
|
||||
|
||||
if result:
|
||||
await message.answer(
|
||||
f"<b>Документ загружен</b>\n\n"
|
||||
f"Название: <b>{result.get('title', filename)}</b>",
|
||||
f"<b>✅ Документ загружен и добавлен в коллекцию</b>\n\n"
|
||||
f"<b>Название:</b> {decode_title(result.get('title', filename))}\n\n"
|
||||
f"📄 Документ сейчас индексируется. Вы получите уведомление, когда индексация завершится.\n\n",
|
||||
parse_mode="HTML"
|
||||
)
|
||||
else:
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
from aiogram import Router, types
|
||||
from aiogram.types import Message
|
||||
from datetime import datetime
|
||||
import aiohttp
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.domain.services.user_service import UserService, User
|
||||
from tg_bot.domain.user_service import UserService, User
|
||||
from tg_bot.application.services.rag_service import RAGService
|
||||
import re
|
||||
|
||||
|
||||
router = Router()
|
||||
rag_service = RAGService()
|
||||
@ -60,37 +60,41 @@ async def process_premium_question(message: Message, user: User, question_text:
|
||||
answer = rag_result.get("answer", "Извините, не удалось сгенерировать ответ.")
|
||||
sources = rag_result.get("sources", [])
|
||||
|
||||
await save_conversation_to_backend(
|
||||
str(message.from_user.id),
|
||||
question_text,
|
||||
answer,
|
||||
sources
|
||||
)
|
||||
# Беседа уже сохранена в бэкенде через API /rag/question
|
||||
|
||||
import re
|
||||
formatted_answer = answer
|
||||
formatted_answer = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', formatted_answer)
|
||||
formatted_answer = re.sub(r'^(\d+)\.\s+', r'\1. ', formatted_answer, flags=re.MULTILINE)
|
||||
formatted_answer = formatted_answer.replace("- ", "• ")
|
||||
|
||||
response = (
|
||||
f"<b>Ваш вопрос:</b>\n"
|
||||
f"<i>{question_text[:200]}</i>\n\n"
|
||||
f"<b>Ответ:</b>\n{answer}\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"💬 <b>Ответ:</b>\n\n"
|
||||
f"{formatted_answer}\n\n"
|
||||
)
|
||||
|
||||
if sources:
|
||||
response += f"<b>Источники из коллекций:</b>\n"
|
||||
collections_used = {}
|
||||
for source in sources[:5]:
|
||||
collection_name = source.get('collection', 'Неизвестно')
|
||||
if collection_name not in collections_used:
|
||||
collections_used[collection_name] = []
|
||||
collections_used[collection_name].append(source.get('title', 'Без названия'))
|
||||
|
||||
for i, (collection_name, titles) in enumerate(collections_used.items(), 1):
|
||||
response += f"{i}. <b>Коллекция:</b> {collection_name}\n"
|
||||
for title in titles[:2]:
|
||||
response += f" • {title}\n"
|
||||
response += "\n<i>Используйте /mycollections для просмотра всех коллекций</i>\n\n"
|
||||
response += f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
response += f"📚 <b>Источники:</b>\n"
|
||||
for idx, source in enumerate(sources[:5], 1):
|
||||
title = source.get('title', 'Без названия')
|
||||
try:
|
||||
from urllib.parse import unquote
|
||||
decoded = unquote(title)
|
||||
if decoded != title or '%' in title:
|
||||
title = decoded
|
||||
except:
|
||||
pass
|
||||
response += f" {idx}. {title}\n"
|
||||
response += "\n<i>💡 Используйте /mycollections для просмотра всех коллекций</i>\n\n"
|
||||
|
||||
response += (
|
||||
f"<b>Статус:</b> Premium (вопросов безлимитно)\n"
|
||||
f"<b>Всего вопросов:</b> {user.questions_used}"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"✨ <b>Статус:</b> Premium (вопросов безлимитно)\n"
|
||||
f"📊 <b>Всего вопросов:</b> {user.questions_used}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@ -98,9 +102,12 @@ async def process_premium_question(message: Message, user: User, question_text:
|
||||
response = (
|
||||
f"<b>Ваш вопрос:</b>\n"
|
||||
f"<i>{question_text[:200]}</i>\n\n"
|
||||
f"Ошибка при генерации ответа. Попробуйте позже.\n\n"
|
||||
f"<b>Статус:</b> Premium\n"
|
||||
f"<b>Всего вопросов:</b> {user.questions_used}"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"❌ <b>Ошибка при генерации ответа.</b>\n"
|
||||
f"Попробуйте позже.\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"✨ <b>Статус:</b> Premium\n"
|
||||
f"📊 <b>Всего вопросов:</b> {user.questions_used}"
|
||||
)
|
||||
|
||||
await message.answer(response, parse_mode="HTML")
|
||||
@ -122,137 +129,66 @@ async def process_free_question(message: Message, user: User, question_text: str
|
||||
answer = rag_result.get("answer", "Извините, не удалось сгенерировать ответ.")
|
||||
sources = rag_result.get("sources", [])
|
||||
|
||||
await save_conversation_to_backend(
|
||||
str(message.from_user.id),
|
||||
question_text,
|
||||
answer,
|
||||
sources
|
||||
)
|
||||
# Уже все сохранили через /rag/question
|
||||
|
||||
formatted_answer = answer
|
||||
formatted_answer = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', formatted_answer)
|
||||
formatted_answer = re.sub(r'^(\d+)\.\s+', r'\1. ', formatted_answer, flags=re.MULTILINE)
|
||||
formatted_answer = formatted_answer.replace("- ", "• ")
|
||||
response = (
|
||||
f"<b>Ваш вопрос:</b>\n"
|
||||
f"<i>{question_text[:200]}</i>\n\n"
|
||||
f"<b>Ответ:</b>\n{answer}\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"💬 <b>Ответ:</b>\n\n"
|
||||
f"{formatted_answer}\n\n"
|
||||
)
|
||||
|
||||
if sources:
|
||||
response += f"<b>Источники из коллекций:</b>\n"
|
||||
collections_used = {}
|
||||
for source in sources[:5]:
|
||||
collection_name = source.get('collection', 'Неизвестно')
|
||||
if collection_name not in collections_used:
|
||||
collections_used[collection_name] = []
|
||||
collections_used[collection_name].append(source.get('title', 'Без названия'))
|
||||
|
||||
for i, (collection_name, titles) in enumerate(collections_used.items(), 1):
|
||||
response += f"{i}. <b>Коллекция:</b> {collection_name}\n"
|
||||
for title in titles[:2]:
|
||||
response += f" • {title}\n"
|
||||
response += "\n<i>Используйте /mycollections для просмотра всех коллекций</i>\n\n"
|
||||
response += f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
response += f"📚 <b>Источники:</b>\n"
|
||||
for idx, source in enumerate(sources[:5], 1):
|
||||
title = source.get('title', 'Без названия')
|
||||
try:
|
||||
from urllib.parse import unquote
|
||||
decoded = unquote(title)
|
||||
if decoded != title or '%' in title:
|
||||
title = decoded
|
||||
except:
|
||||
pass
|
||||
response += f" {idx}. {title}\n"
|
||||
response += "\n<i>💡 Используйте /mycollections для просмотра всех коллекций</i>\n\n"
|
||||
|
||||
response += (
|
||||
f"<b>Статус:</b> Бесплатный доступ\n"
|
||||
f"<b>Использовано вопросов:</b> {user.questions_used}/{settings.FREE_QUESTIONS_LIMIT}\n"
|
||||
f"<b>Осталось бесплатных:</b> {remaining}\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"📊 <b>Статус:</b> Бесплатный доступ\n"
|
||||
f"📈 <b>Использовано вопросов:</b> {user.questions_used}/{settings.FREE_QUESTIONS_LIMIT}\n"
|
||||
f"🎯 <b>Осталось бесплатных:</b> {remaining}\n\n"
|
||||
)
|
||||
|
||||
if remaining <= 3 and remaining > 0:
|
||||
response += f"<i>Осталось мало вопросов! Для продолжения используйте /buy</i>\n\n"
|
||||
response += f"⚠️ <i>Осталось мало вопросов! Для продолжения используйте /buy</i>\n\n"
|
||||
|
||||
response += f"<i>Для безлимитного доступа: /buy</i>"
|
||||
response += f"💎 <i>Для безлимитного доступа: /buy</i>"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating answer: {e}")
|
||||
response = (
|
||||
f"<b>Ваш вопрос:</b>\n"
|
||||
f"<i>{question_text[:200]}</i>\n\n"
|
||||
f"Ошибка при генерации ответа. Попробуйте позже.\n\n"
|
||||
f"<b>Статус:</b> Бесплатный доступ\n"
|
||||
f"<b>Использовано вопросов:</b> {user.questions_used}/{settings.FREE_QUESTIONS_LIMIT}\n"
|
||||
f"<b>Осталось бесплатных:</b> {remaining}\n\n"
|
||||
f"<i>Для безлимитного доступа: /buy</i>"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"❌ <b>Ошибка при генерации ответа.</b>\n"
|
||||
f"Попробуйте позже.\n\n"
|
||||
f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
|
||||
f"📊 <b>Статус:</b> Бесплатный доступ\n"
|
||||
f"📈 <b>Использовано вопросов:</b> {user.questions_used}/{settings.FREE_QUESTIONS_LIMIT}\n"
|
||||
f"🎯 <b>Осталось бесплатных:</b> {remaining}\n\n"
|
||||
f"💎 <i>Для безлимитного доступа: /buy</i>"
|
||||
)
|
||||
|
||||
await message.answer(response, parse_mode="HTML")
|
||||
|
||||
|
||||
async def save_conversation_to_backend(telegram_id: str, question: str, answer: str, sources: list):
|
||||
try:
|
||||
from tg_bot.config.settings import settings
|
||||
backend_url = settings.BACKEND_URL
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"{backend_url}/users/telegram/{telegram_id}"
|
||||
) as user_response:
|
||||
if user_response.status != 200:
|
||||
return
|
||||
user_data = await user_response.json()
|
||||
user_uuid = user_data.get("user_id")
|
||||
|
||||
async with session.get(
|
||||
f"{backend_url}/collections/",
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
) as collections_response:
|
||||
collections = []
|
||||
if collections_response.status == 200:
|
||||
collections = await collections_response.json()
|
||||
|
||||
collection_id = None
|
||||
if collections:
|
||||
collection_id = collections[0].get("collection_id")
|
||||
else:
|
||||
async with session.post(
|
||||
f"{backend_url}/collections",
|
||||
json={
|
||||
"name": "Основная коллекция",
|
||||
"description": "Коллекция по умолчанию",
|
||||
"is_public": False
|
||||
},
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
) as create_collection_response:
|
||||
if create_collection_response.status in [200, 201]:
|
||||
collection_data = await create_collection_response.json()
|
||||
collection_id = collection_data.get("collection_id")
|
||||
|
||||
if not collection_id:
|
||||
return
|
||||
|
||||
async with session.post(
|
||||
f"{backend_url}/conversations",
|
||||
json={"collection_id": str(collection_id)},
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
) as conversation_response:
|
||||
if conversation_response.status not in [200, 201]:
|
||||
return
|
||||
conversation_data = await conversation_response.json()
|
||||
conversation_id = conversation_data.get("conversation_id")
|
||||
|
||||
if not conversation_id:
|
||||
return
|
||||
|
||||
await session.post(
|
||||
f"{backend_url}/messages",
|
||||
json={
|
||||
"conversation_id": str(conversation_id),
|
||||
"content": question,
|
||||
"role": "user"
|
||||
},
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
)
|
||||
|
||||
await session.post(
|
||||
f"{backend_url}/messages",
|
||||
json={
|
||||
"conversation_id": str(conversation_id),
|
||||
"content": answer,
|
||||
"role": "assistant",
|
||||
"sources": {"documents": sources}
|
||||
},
|
||||
headers={"X-Telegram-ID": telegram_id}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error saving conversation: {e}")
|
||||
#Сново сохраняется в /rag/question
|
||||
|
||||
|
||||
async def handle_limit_exceeded(message: Message, user: User):
|
||||
|
||||
@ -4,7 +4,7 @@ from aiogram.types import Message
|
||||
from datetime import datetime
|
||||
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.domain.services.user_service import UserService
|
||||
from tg_bot.domain.user_service import UserService
|
||||
|
||||
router = Router()
|
||||
user_service = UserService()
|
||||
|
||||
@ -4,7 +4,7 @@ from aiogram.filters import Command
|
||||
from aiogram.types import Message
|
||||
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.domain.services.user_service import UserService
|
||||
from tg_bot.domain.user_service import UserService
|
||||
|
||||
router = Router()
|
||||
user_service = UserService()
|
||||
|
||||
@ -18,7 +18,7 @@ async def handle_yookassa_webhook(request: Request):
|
||||
print(f"Webhook received: {event_type}")
|
||||
try:
|
||||
from tg_bot.config.settings import settings
|
||||
from tg_bot.domain.services.user_service import UserService
|
||||
from tg_bot.domain.user_service import UserService
|
||||
from aiogram import Bot
|
||||
|
||||
if event_type == "payment.succeeded":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user