"""
Base de conhecimento com busca semântica
"""
import sqlite3
import json
import numpy as np
from typing import List, Dict, Optional
import asyncio
from dataclasses import dataclass

from config.settings import settings
from services.embedding_service import EmbeddingService


@dataclass
class KnowledgeItem:
    """Item de conhecimento"""
    id: str
    content: str
    source: str
    metadata: Dict
    embedding: Optional[List[float]] = None


class KnowledgeBase:
    """
    Base de conhecimento com:
    - Armazenamento SQLite
    - Embeddings para busca semântica
    - Chunking de documentos
    """
    
    def __init__(self):
        self.db_path = "knowledge_base.db"
        self.embedding_service = EmbeddingService(settings.embedding)
        self._init_db()
    
    def _init_db(self):
        """Inicializa banco de dados"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS knowledge (
                id TEXT PRIMARY KEY,
                content TEXT NOT NULL,
                source TEXT NOT NULL,
                metadata TEXT,
                embedding TEXT,
                created_at TEXT
            )
        """)
        
        cursor.execute("""
            CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts
            USING fts5(content, source, knowledge_id)
        """)
        
        conn.commit()
        conn.close()
    
    async def add_document(
        self,
        content: str,
        source: str = "manual",
        doc_id: Optional[str] = None,
        metadata: Optional[Dict] = None
    ) -> str:
        """
        Adiciona documento à base de conhecimento
        Faz chunking automático se necessário
        """
        # Chunking se documento for grande
        chunks = self._chunk_document(content)
        
        doc_id = doc_id or self._generate_id(content[:50])
        
        for i, chunk in enumerate(chunks):
            chunk_id = f"{doc_id}_{i}"
            
            # Gerar embedding
            embedding = await self.embedding_service.get_embedding(chunk)
            
            # Salvar no banco
            await self._save_item(
                KnowledgeItem(
                    id=chunk_id,
                    content=chunk,
                    source=source,
                    metadata={**(metadata or {}), "chunk_index": i},
                    embedding=embedding
                )
            )
        
        return doc_id
    
    async def search(
        self,
        query: str,
        top_k: int = 5,
        use_semantic: bool = True
    ) -> List[str]:
        """
        Busca na base de conhecimento
        Combina busca semântica (embeddings) e textual (FTS)
        """
        results = []
        
        if use_semantic:
            # Busca semântica
            semantic_results = await self._semantic_search(query, top_k)
            results.extend(semantic_results)
        
        # Busca textual como complemento
        text_results = await self._text_search(query, top_k)
        
        # Combinar e remover duplicatas
        seen = set()
        combined = []
        for r in results + text_results:
            if r not in seen:
                seen.add(r)
                combined.append(r)
        
        return combined[:top_k]
    
    async def _semantic_search(
        self,
        query: str,
        top_k: int
    ) -> List[str]:
        """Busca por similaridade de embeddings"""
        query_embedding = await self.embedding_service.get_embedding(query)
        
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(
            None,
            self._find_similar,
            query_embedding,
            top_k
        )
    
    def _find_similar(
        self,
        query_embedding: List[float],
        top_k: int
    ) -> List[str]:
        """Calcula similaridade de cosseno no banco"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute(
            "SELECT id, content, embedding FROM knowledge WHERE embedding IS NOT NULL"
        )
        
        rows = cursor.fetchall()
        conn.close()
        
        if not rows:
            return []
        
        similarities = []
        q_vec = np.array(query_embedding)
        
        for id_, content, emb_str in rows:
            emb = json.loads(emb_str)
            vec = np.array(emb)
            
            # Similaridade de cosseno
            similarity = np.dot(q_vec, vec) / (np.linalg.norm(q_vec) * np.linalg.norm(vec))
            similarities.append((similarity, content))
        
        # Ordenar por similaridade
        similarities.sort(reverse=True, key=lambda x: x[0])
        
        return [content for _, content in similarities[:top_k]]
    
    async def _text_search(self, query: str, top_k: int) -> List[str]:
        """Busca textual com FTS"""
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(
            None, self._fts_search, query, top_k
        )
    
    def _fts_search(self, query: str, top_k: int) -> List[str]:
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute("""
            SELECT k.content 
            FROM knowledge k
            JOIN knowledge_fts fts ON k.id = fts.knowledge_id
            WHERE knowledge_fts MATCH ?
            ORDER BY rank
            LIMIT ?
        """, (query, top_k))
        
        results = [row[0] for row in cursor.fetchall()]
        conn.close()
        return results
    
    async def _save_item(self, item: KnowledgeItem):
        """Salva item no banco"""
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(None, self._insert_item, item)
    
    def _insert_item(self, item: KnowledgeItem):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute("""
            INSERT OR REPLACE INTO knowledge (id, content, source, metadata, embedding, created_at)
            VALUES (?, ?, ?, ?, ?, ?)
        """, (
            item.id,
            item.content,
            item.source,
            json.dumps(item.metadata),
            json.dumps(item.embedding) if item.embedding else None,
            __import__('datetime').datetime.now().isoformat()
        ))
        
        # Also insert into FTS
        cursor.execute("""
            INSERT INTO knowledge_fts (content, source, knowledge_id)
            VALUES (?, ?, ?)
        """, (item.content, item.source, item.id))
        
        conn.commit()
        conn.close()
    
    def _chunk_document(self, content: str) -> List[str]:
        """Divide documento em chunks"""
        chunk_size = settings.embedding.chunk_size
        overlap = settings.embedding.chunk_overlap
    
        if len(content) <= chunk_size:
            return [content]
    
        chunks = []
        start = 0
    
        while start < len(content):
            end = start + chunk_size
            chunk = content[start:end]
            chunks.append(chunk.strip())
            start = end - overlap
    
        return chunks
    
    def _generate_id(self, content: str) -> str:
        """Gera ID único para o documento"""
        import hashlib
        return hashlib.md5(content.encode()).hexdigest()[:12]
    
    async def delete_document(self, doc_id: str):
        """Remove documento e seus chunks"""
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(None, self._remove_document, doc_id)
    
    def _remove_document(self, doc_id: str):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute(
            "DELETE FROM knowledge WHERE id LIKE ?", (f"{doc_id}_%",)
        )
        
        cursor.execute(
            "DELETE FROM knowledge_fts WHERE knowledge_id LIKE ?", (f"{doc_id}_%",)
        )
        
        conn.commit()
        conn.close()
    
    async def list_sources(self) -> List[str]:
        """Lista fontes disponíveis"""
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, self._fetch_sources)
    
    def _fetch_sources(self) -> List[str]:
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        cursor.execute("SELECT DISTINCT source FROM knowledge")
        sources = [row[0] for row in cursor.fetchall()]
        conn.close()
        return sources