from crewai import Agent
from utils.gemini_llm import GeminiLLM
import os
import time
import asyncio
from typing import Dict, Any, Optional, List
from dotenv import load_dotenv
from utils.logger import Logger, app_logger
from utils.query_planner import QueryPlanner

load_dotenv()

class QueryUnderstandingAgent:
    """Agent responsible for understanding user queries and extracting key information."""
    
    def __init__(self, model_name: str = "gemini-2.0-flash", logger: Optional[Logger] = None, ontology_path: str = "kb/ontology.yaml"):
        """Initialize the QueryUnderstandingAgent
        
        Args:
            model_name: Name of the LLM model to use
            logger: Optional logger instance
            ontology_path: Path to the ontology YAML file
        """
        self.llm = GeminiLLM(
            model_name=model_name,
            temperature=0.2
        )
        self.logger = logger or app_logger
        self.name = "QueryUnderstandingAgent"
        
        # Initialize the query planner
        self.query_planner = QueryPlanner(ontology_path=ontology_path, model_name=model_name, logger=self.logger)
    
    def create_agent(self):
        return Agent(
            role="Query Analyst",
            goal="Understand user queries and extract key information for effective retrieval",
            backstory="""You are an expert at understanding user questions and extracting the 
            key information needed for effective information retrieval. You can identify the 
            main topics, entities, and intent behind user queries.""",
            verbose=True,
            llm=self.llm
        )
    
    def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
        """Run the agent to analyze the query
        
        Args:
            context: The context containing the query and other information
            
        Returns:
            Dict[str, Any]: The analysis results and updated context
        """
        start_time = time.time()
        query = context.get("query", "")
        
        if not query:
            self.logger.error(f"{self.name}: No query provided in context")
            return {**context, "error": "No query provided"}
            
        self.logger.log_agent_start(self.name, f"Analyzing query: {query[:50]}..." if len(query) > 50 else query)
        
        try:
            # Analyze the query
            analysis = self.analyze_query(query)
            execution_time = time.time() - start_time
            
            self.logger.log_agent_complete(self.name, execution_time)
            self.logger.info(f"{self.name}: Extracted {len(analysis.get('keywords', []))} keywords and {len(analysis.get('topics', []))} topics")
            
            # Update context with analysis results
            return {**context, "query_analysis": analysis, "error": None}
            
        except Exception as e:
            execution_time = time.time() - start_time
            self.logger.log_agent_error(self.name, e)
            return {**context, "error": str(e)}
    
    def analyze_query(self, query: str) -> Dict[str, Any]:
        """
        Analyze the user's query to extract key information using QueryPlanner
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: Analysis results with search plan
        """
        self.logger.info(f"Analyzing query: {query}")
        
        try:
            # Use the query planner to generate a search plan
            search_plan = self.query_planner.plan_query(query)
            self.logger.info(f"Generated search plan with intent: {search_plan.get('intent')}")
            
            # Use LLM to analyze the query for additional information
            analysis = self._analyze_with_llm(query)
            
            # Enhance the analysis with the search plan
            analysis["search_plan"] = search_plan
            
            # Use multi-queries for better retrieval
            if search_plan.get("multi_queries"):
                analysis["multi_queries"] = search_plan["multi_queries"]
            
            # Add intent information
            analysis["intent"] = search_plan.get("intent", "lookup")
            
            # For technology listing requests, enhance keywords
            if search_plan.get("intent") == "list" and "technologies" in search_plan.get("facets", []):
                tech_keywords = [
                    "technologies", "programming", "languages", "frameworks", "tools", 
                    "platforms", "wordpress", "php", "javascript", "react", "laravel", 
                    "python", "ai", "development", "stack", "expertise"
                ]
                analysis["keywords"] = list(set(analysis.get("keywords", []) + tech_keywords))
                analysis["is_tech_list_request"] = True
            
            # If it's a greeting, we don't need to extract keywords for search
            if analysis["query_type"] == "greeting":
                analysis["search_query"] = ""  # No need to search for greetings
                return analysis
            
            # Create search query from keywords or use multi-queries
            if analysis["keywords"]:
                analysis["search_query"] = " ".join(analysis["keywords"])
            else:
                analysis["search_query"] = query
            
            return analysis
            
        except Exception as e:
            self.logger.error(f"Error analyzing query: {str(e)}")
            return self._simple_analysis(query)
    
    def _moderation_check(self, text: str) -> Dict[str, Any]:
        """
        Check if text contains abusive content
        
        Args:
            text: Text to check
            
        Returns:
            Dict[str, Any]: Moderation results
        """
        # Simple keyword-based check (can be enhanced with a model)
        abuse_keywords = [
            "fuck", "shit", "ass", "bitch", "dick", "porn", "kill", "suicide",
            "hate", "racist", "nazi", "terrorism", "bomb", "murder", "rape"
        ]
        
        text_lower = text.lower()
        for keyword in abuse_keywords:
            if keyword in text_lower:
                return {"is_abusive": True, "category": "profanity", "severity": "medium"}
        
        return {"is_abusive": False, "category": None, "severity": None}
    
    def _is_smalltalk(self, text: str) -> bool:
        """
        Check if text is smalltalk/greeting
        
        Args:
            text: Text to check
            
        Returns:
            bool: True if text is smalltalk
        """
        smalltalk_patterns = [
            "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
            "how are you", "what's up", "namaste", "kaise ho", "kya hal hai",
            "👋", "🙏", "😊", "thank you", "thanks"
        ]
        
        text_lower = text.lower()
        
        # Check for common greeting patterns
        for pattern in smalltalk_patterns:
            if pattern in text_lower:
                return True
        
        # Short messages are likely greetings
        if len(text_lower.split()) <= 4:
            return True
            
        return False
    
    def _analyze_with_llm(self, query: str) -> Dict[str, Any]:
        """
        Use the LLM to analyze the query and extract key information
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: Analysis results with routing
        """
        # Check for abuse first
        moderation = self._moderation_check(query)
        if moderation["is_abusive"]:
            self.logger.warning(f"Detected abusive content: {moderation['category']}")
            return {
                "query_type": "other",
                "keywords": [],
                "entities": [],
                "topics": [],
                "search_query": "",
                "route": "abuse",
                "moderation": moderation,
                "original_query": query
            }
        
        # Check for smalltalk
        if self._is_smalltalk(query):
            self.logger.info("Detected smalltalk/greeting")
            return {
                "query_type": "greeting",
                "keywords": [],
                "entities": [],
                "topics": [],
                "search_query": "",
                "route": "smalltalk",
                "original_query": query
            }
        
        # Create a prompt for the LLM to analyze the query
        prompt = f"""
        Analyze the following user query and extract key information:
        
        Query: "{query}"
        
        Provide the following information in your analysis:
        1. Query Type: Classify as one of [greeting, question, request, about, general_conversation]
           - greeting: Simple greetings like "hello", "hi", "good morning", etc.  
           - question: Queries asking for specific information
           - request: Commands or requests for the system to do something
           - about: Questions about the assistant itself
           - general_conversation: General statements or comments
        
        2. Keywords: Extract 3-5 most important keywords relevant for search (exclude common words)
        
        3. Entities: Identify any named entities (people, companies, products, etc.)
        
        4. Topics: Identify relevant topics from [services, products, company, contact, technologies, pricing, portfolio]
        
        Format your response as a JSON object with these fields.
        """
        
        try:
            # Call the LLM to analyze the query - avoid using asyncio.run() in an event loop
            try:
                response = self.llm.generate(prompt)
            except RuntimeError as e:
                if "cannot be called from a running event loop" in str(e):
                    # We're already in an event loop, use a simpler approach
                    self.logger.warning("Using fallback analysis due to asyncio conflict")
                    return self._simple_analysis(query)
                else:
                    raise
            
            # Parse the response as JSON
            import json
            import re
            
            # Extract JSON from the response (it might be wrapped in markdown code blocks)
            json_match = re.search(r'```json\s*(.+?)\s*```', response, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
            else:
                # Try to find any JSON-like structure
                json_match = re.search(r'\{[^\{\}]*"query_type"[^\{\}]*\}', response, re.DOTALL)
                if json_match:
                    json_str = json_match.group(0)
                else:
                    json_str = response
            
            # Clean up the string to ensure it's valid JSON
            json_str = re.sub(r'[\n\r\t]', ' ', json_str)
            
            try:
                analysis = json.loads(json_str)
            except json.JSONDecodeError:
                # If JSON parsing fails, extract information using regex
                self.logger.warning("Failed to parse LLM response as JSON, falling back to regex extraction")
                analysis = self._extract_analysis_with_regex(response)
            
            # Ensure all required fields are present
            analysis.setdefault("query_type", "general_conversation")
            analysis.setdefault("keywords", [])
            analysis.setdefault("entities", [])
            analysis.setdefault("topics", [])
            analysis["original_query"] = query
            
            # Set default route to RAG
            analysis["route"] = "rag"
            
            # Ensure we have a search query for non-greeting queries
            if not analysis.get("search_query") and analysis.get("query_type") != "greeting":
                if analysis["keywords"]:
                    analysis["search_query"] = " ".join(analysis["keywords"])
                else:
                    analysis["search_query"] = query
            
            # Log the route and search query
            self.logger.info(f"Query route: {analysis['route']}")
            if analysis["route"] == "rag":
                self.logger.info(f"Search query: {analysis.get('search_query', '')}")
            
            return analysis
            
        except Exception as e:
            self.logger.error(f"Error analyzing query with LLM: {str(e)}")
            # Fall back to simple analysis if LLM fails
            simple_analysis = self._simple_analysis(query)
            simple_analysis["route"] = "rag"  # Default route for fallback
            return simple_analysis
    
    def _extract_analysis_with_regex(self, llm_response: str) -> Dict[str, Any]:
        """
        Extract analysis information from LLM response using regex
        
        Args:
            llm_response: The response from the LLM
            
        Returns:
            Dict[str, Any]: Extracted analysis
        """
        import re
        
        analysis = {}
        
        # Extract query type
        query_type_match = re.search(r'query_type["\s]*:["\'\s]*(greeting|question|request|about|general_conversation)', llm_response, re.IGNORECASE)
        if query_type_match:
            analysis["query_type"] = query_type_match.group(1).lower()
        
        # Extract keywords
        keywords_match = re.search(r'keywords["\s]*:["\'\s]*\[(.+?)\]', llm_response, re.DOTALL)
        if keywords_match:
            keywords_str = keywords_match.group(1)
            keywords = re.findall(r'["\'](.*?)["\']', keywords_str)
            analysis["keywords"] = keywords
        
        # Extract entities
        entities_match = re.search(r'entities["\s]*:["\'\s]*\[(.+?)\]', llm_response, re.DOTALL)
        if entities_match:
            entities_str = entities_match.group(1)
            entities = re.findall(r'["\'](.*?)["\']', entities_str)
            analysis["entities"] = entities
        
        # Extract topics
        topics_match = re.search(r'topics["\s]*:["\'\s]*\[(.+?)\]', llm_response, re.DOTALL)
        if topics_match:
            topics_str = topics_match.group(1)
            topics = re.findall(r'["\'](.*?)["\']', topics_str)
            analysis["topics"] = topics
        
        return analysis
    
    async def arun(self, context: Dict[str, Any]) -> Dict[str, Any]:
        """
        Async version of run method with QueryPlanner integration
        
        Args:
            context: The context containing the query and other information
            
        Returns:
            Dict[str, Any]: The analysis results and updated context
        """
        start_time = time.time()
        query = context.get("query", "")
        
        if not query:
            self.logger.error(f"{self.name}: No query provided in context")
            return {**context, "error": "No query provided"}
            
        self.logger.log_agent_start(self.name, f"Analyzing query: {query[:50]}..." if len(query) > 50 else query)
        
        try:
            # Use the query planner to generate a search plan asynchronously
            search_plan = await self.query_planner.plan_query_async(query)
            self.logger.info(f"Generated search plan with intent: {search_plan.get('intent')}")
            
            # Use async LLM analysis
            analysis = await self._analyze_with_llm_async(query)
            
            # Enhance the analysis with the search plan
            analysis["search_plan"] = search_plan
            
            # Use multi-queries for better retrieval
            if search_plan.get("multi_queries"):
                analysis["multi_queries"] = search_plan["multi_queries"]
            
            # Add intent information
            analysis["intent"] = search_plan.get("intent", "lookup")
            
            # For technology listing requests, enhance keywords
            if search_plan.get("intent") == "list" and "technologies" in search_plan.get("facets", []):
                tech_keywords = [
                    "technologies", "programming", "languages", "frameworks", "tools", 
                    "platforms", "wordpress", "php", "javascript", "react", "laravel", 
                    "python", "ai", "development", "stack", "expertise"
                ]
                analysis["keywords"] = list(set(analysis.get("keywords", []) + tech_keywords))
                analysis["is_tech_list_request"] = True
            
            execution_time = time.time() - start_time
            self.logger.log_agent_complete(self.name, execution_time)
            self.logger.info(f"{self.name}: Extracted {len(analysis.get('keywords', []))} keywords and {len(analysis.get('topics', []))} topics")
            
            # Update context with analysis results
            return {**context, "query_analysis": analysis, "error": None}
            
        except Exception as e:
            execution_time = time.time() - start_time
            self.logger.log_agent_error(self.name, e)
            # Fallback to basic analysis with original query
            return {**context, "query_analysis": self._simple_analysis(query), "error": str(e)}
    
    async def _analyze_with_llm_async(self, query: str) -> Dict[str, Any]:
        """
        Analyze query using LLM asynchronously
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: Analysis results
        """
        prompt = self._build_prompt(query)
        # Run LLM in thread to avoid blocking
        response = await asyncio.to_thread(self.llm.generate, prompt)
        
        # Parse the response as JSON
        import json
        import re
        
        # Extract JSON from the response (it might be wrapped in markdown code blocks)
        json_match = re.search(r'```json\s*(.+?)\s*```', response, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)
        else:
            # Try to find any JSON-like structure
            json_match = re.search(r'\{[^\{\}]*"query_type"[^\{\}]*\}', response, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
            else:
                json_str = response
        
        # Clean up the string to ensure it's valid JSON
        json_str = re.sub(r'[\n\r\t]', ' ', json_str)
        
        try:
            analysis = json.loads(json_str)
        except json.JSONDecodeError:
            # If JSON parsing fails, extract information using regex
            self.logger.warning("Failed to parse LLM response as JSON, falling back to regex extraction")
            analysis = self._extract_analysis_with_regex(response)
        
        # Ensure all required fields are present
        analysis.setdefault("query_type", "general_conversation")
        analysis.setdefault("keywords", [])
        analysis.setdefault("entities", [])
        analysis.setdefault("topics", [])
        analysis["original_query"] = query
        
        # Ensure we have a search query
        if not analysis.get("search_query") and analysis.get("query_type") != "greeting":
            analysis["search_query"] = query
        
        return analysis
    
    def _build_prompt(self, query: str) -> str:
        """
        Build prompt for LLM analysis
        
        Args:
            query: The user's query
            
        Returns:
            str: Prompt for LLM
        """
        return f"""
        Analyze the following user query and extract key information:
        
        Query: \"{query}\"
        
        Provide the following information in your analysis:
        1. Query Type: Classify as one of [greeting, question, request, about, general_conversation]
           - greeting: Simple greetings like \"hello\", \"hi\", \"good morning\", etc.  
           - question: Queries asking for specific information
           - request: Commands or requests for the system to do something
           - about: Questions about the assistant itself
           - general_conversation: General statements or comments
        
        2. Keywords: Extract 3-5 most important keywords relevant for search (exclude common words)
        
        3. Entities: Identify any named entities (people, companies, products, etc.)
        
        4. Topics: Identify relevant topics from [services, products, company, contact, technologies, pricing, portfolio]
        
        Format your response as a JSON object with these fields.
        """
    
    def _simple_analysis(self, query: str) -> Dict[str, Any]:
        """
        Simple fallback analysis when LLM fails
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: Simple analysis results
        """
        query_lower = query.lower().strip()
        query_words = query_lower.split()
        
        # Simple query type detection
        query_type = "general_conversation"
        
        # Check for greetings
        greeting_patterns = ["hi", "hello", "hey", "greetings", "good morning", "good afternoon", "good evening"]
        if len(query_words) <= 3 and any(pattern in query_lower for pattern in greeting_patterns):
            query_type = "greeting"
        elif query_lower.endswith("?") or any(q in query_lower for q in ["how", "what", "when", "where", "who", "why"]):
            query_type = "question"
        
        # Enhanced keyword extraction
        common_words = ["the", "a", "an", "in", "on", "at", "to", "for", "with", "about", "of", "is", "are", "am", 
                      "how", "many", "much", "do", "does", "can", "could", "would", "should", "will"]
        
        # Always include company name as a keyword for company-related queries
        keywords = []
        if "mangoit" in query_lower or "mango it" in query_lower:
            keywords.append("mangoit")
            
        # Extract other important keywords
        for word in query_words:
            if word not in common_words and len(word) > 2 and word not in keywords:
                keywords.append(word)
                
        # If no keywords found but query is about the company, use some default keywords
        if not keywords and ("mangoit" in query_lower or "mango it" in query_lower or "company" in query_lower):
            keywords = ["mangoit", "company", "information"]
            
        # For questions about people or numbers, add specific keywords
        if "how many" in query_lower or "number of" in query_lower:
            if "employee" in query_lower or "staff" in query_lower or "people" in query_lower or "professionals" in query_lower:
                keywords.extend(["employees", "team", "size"])
                
        # For questions about specific roles or people
        if "who" in query_lower:
            if "ceo" in query_lower or "cto" in query_lower or "founder" in query_lower or "director" in query_lower:
                keywords.extend(["leadership", "management", "team"])
        
        return {
            "original_query": query,
            "query_type": query_type,
            "keywords": keywords,
            "entities": [],
            "topics": []
        }