import yaml
import os
import re
from typing import Dict, Any, List, Optional
import asyncio
from utils.logger import Logger, app_logger
from utils.llm_config import get_llm

class QueryPlanner:
    """
    Query Planner that analyzes user queries and generates structured search plans
    to improve retrieval quality.
    """
    
    def __init__(self, ontology_path: str = "kb/ontology.yaml", model_name: str = None, temperature: float = None, logger: Optional[Logger] = None):
        """
        Initialize the QueryPlanner
        
        Args:
            ontology_path: Path to the ontology YAML file
            model_name: Optional override for the LLM model to use
            temperature: Optional override for the temperature
            logger: Optional logger instance
        """
        self.ontology = self._load_ontology(ontology_path)
        # Get LLM from centralized configuration
        self.llm = get_llm(model_name=model_name, temperature=temperature)
        self.logger = logger or app_logger
        
    def _load_ontology(self, ontology_path: str) -> Dict[str, Any]:
        """
        Load the ontology from a YAML file
        
        Args:
            ontology_path: Path to the ontology YAML file
            
        Returns:
            Dict[str, Any]: The loaded ontology
        """
        try:
            if os.path.exists(ontology_path):
                with open(ontology_path, 'r', encoding='utf-8') as file:
                    return yaml.safe_load(file)
            else:
                return {"entities": {}, "facets": {}, "intents": {}}
        except Exception as e:
            print(f"Error loading ontology: {str(e)}")
            return {"entities": {}, "facets": {}, "intents": {}}
    
    def plan_query(self, query: str) -> Dict[str, Any]:
        """
        Generate a search plan for the given query
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: The search plan
        """
        try:
            # Create a prompt for the LLM
            prompt = self._build_planning_prompt(query)
            
            # Generate the search plan using the LLM
            response = self.llm.generate(prompt)
            
            # Parse the response
            search_plan = self._parse_llm_response(response)
            
            # Ensure all required fields are present
            search_plan = self._validate_search_plan(search_plan, query)
            
            return search_plan
        except Exception as e:
            self.logger.error(f"Error planning query: {str(e)}")
            # Return a basic search plan as fallback
            return self._create_fallback_plan(query)
    
    async def plan_query_async(self, query: str) -> Dict[str, Any]:
        """
        Generate a search plan for the given query asynchronously
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: The search plan
        """
        try:
            # Create a prompt for the LLM
            prompt = self._build_planning_prompt(query)
            
            # Generate the search plan using the LLM asynchronously
            response = await asyncio.to_thread(self.llm.generate, prompt)
            
            # Parse the response
            search_plan = self._parse_llm_response(response)
            
            # Ensure all required fields are present
            search_plan = self._validate_search_plan(search_plan, query)
            
            return search_plan
        except Exception as e:
            self.logger.error(f"Error planning query asynchronously: {str(e)}")
            # Return a basic search plan as fallback
            return self._create_fallback_plan(query)
    
    def _build_planning_prompt(self, query: str) -> str:
        """
        Build a prompt for the LLM to generate a search plan
        
        Args:
            query: The user's query
            
        Returns:
            str: The prompt for the LLM
        """
        # Convert ontology to a simplified JSON string for the prompt
        ontology_str = str(self.ontology).replace("'", '"')
        
        # Add company context to the prompt
        company_context = """
        Company Context: MangoIT Solutions is a web and mobile app development company that offers various services including:
        - Web Development (PHP, Laravel, CodeIgniter, WordPress, Magento)
        - Mobile App Development (iOS, Android, React Native)
        - eCommerce Development (Magento, Shopify, WooCommerce)
        - Custom Software Development
        - AI/ML Development
        - Digital Marketing
        - UI/UX Design
        
        They work with technologies like PHP, Python, JavaScript, React, Angular, Node.js, and various frameworks.
        """
        
        return f"""
        {company_context}
        
        Given the user query: "{query}", infer the following:
        
        1. Intent: Classify as one of [lookup, list, compare, how_to, troubleshoot, navigate, pricing, policy, other]
        2. Entities: Extract key entities mentioned in the query
        3. Facets: Identify relevant facets from [technologies, services, portfolio, pricing, company, contact]
        4. Constraints: Any constraints like language, date, region
        5. Multi-queries: Generate 6-12 diverse rewritten queries that would help retrieve relevant information. Include specific technology names and service names in these queries when relevant.
        6. Answer requirements: What the answer should include (e.g., list, group_by_category, citations)
        7. Fallbacks: Strategies to try if initial retrieval fails
        
        Use this ontology for normalization and expansion: {ontology_str}
        
        Return a JSON object with the following structure:
        {{
          "intent": "lookup|list|compare|how_to|troubleshoot|navigate|pricing|policy|other",
          "entities": ["entity1", "entity2"],
          "facets": ["facet1", "facet2"],
          "constraints": {{"lang":"auto","date":null,"region":"IN"}},
          "multi_queries": ["query1", "query2", "query3"],
          "answer_requirements": ["list", "group_by_category", "citations"],
          "fallbacks": ["broaden_terms", "use_synonyms", "keyword_pre_filter"]
        }}
        """
    
    def _parse_llm_response(self, response: str) -> Dict[str, Any]:
        """
        Parse the LLM response to extract the search plan
        
        Args:
            response: The LLM response
            
        Returns:
            Dict[str, Any]: The parsed search plan
        """
        import json
        import re
        
        # Extract JSON from the response (it might be wrapped in markdown code blocks)
        json_match = re.search(r'```(?:json)?\s*(.+?)\s*```', response, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)
        else:
            # Try to find any JSON-like structure
            json_match = re.search(r'\{.+\}', response, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
            else:
                json_str = response
        
        # Clean up the string to ensure it's valid JSON
        json_str = re.sub(r'[\n\r\t]', ' ', json_str)
        
        try:
            return json.loads(json_str)
        except json.JSONDecodeError:
            self.logger.warning("Failed to parse LLM response as JSON")
            return {}
    
    def _validate_search_plan(self, search_plan: Dict[str, Any], original_query: str) -> Dict[str, Any]:
        """
        Ensure the search plan has all required fields
        
        Args:
            search_plan: The search plan to validate
            original_query: The original user query
            
        Returns:
            Dict[str, Any]: The validated search plan
        """
        # Ensure all required fields are present
        search_plan.setdefault("intent", "lookup")
        search_plan.setdefault("entities", [])
        search_plan.setdefault("facets", [])
        search_plan.setdefault("constraints", {"lang": "auto", "date": None, "region": "IN"})
        search_plan.setdefault("multi_queries", [original_query])
        search_plan.setdefault("answer_requirements", ["citations"])
        search_plan.setdefault("fallbacks", ["broaden_terms", "use_synonyms"])
        
        # Ensure multi_queries includes the original query
        if original_query not in search_plan["multi_queries"]:
            search_plan["multi_queries"].append(original_query)
        
        # Check for technology listing request
        if self._is_technology_listing_request(original_query):
            if "list" not in search_plan["intent"]:
                search_plan["intent"] = "list"
            if "technologies" not in search_plan["facets"]:
                search_plan["facets"].append("technologies")
            if "list" not in search_plan["answer_requirements"]:
                search_plan["answer_requirements"].append("list")
            if "group_by_category" not in search_plan["answer_requirements"]:
                search_plan["answer_requirements"].append("group_by_category")
        
        return search_plan
    
    def _is_technology_listing_request(self, query: str) -> bool:
        """
        Check if the query is asking for a list of technologies
        
        Args:
            query: The user's query
            
        Returns:
            bool: True if the query is asking for a list of technologies
        """
        query_lower = query.lower()
        tech_list_patterns = [
            "technologies", "tech stack", "programming languages", "frameworks", 
            "tools", "platforms", "list of technologies", "what technologies", 
            "tech used", "technologies used", "tech work", "work with"
        ]
        return any(pattern in query_lower for pattern in tech_list_patterns)
    
    def _create_fallback_plan(self, query: str) -> Dict[str, Any]:
        """
        Create a basic search plan as fallback
        
        Args:
            query: The user's query
            
        Returns:
            Dict[str, Any]: A basic search plan
        """
        # Check for technology listing request
        is_tech_list = self._is_technology_listing_request(query)
        
        # Create appropriate fallback plan
        if is_tech_list:
            return {
                "intent": "list",
                "entities": ["mangoit"],
                "facets": ["technologies"],
                "constraints": {"lang": "auto", "date": None, "region": "IN"},
                "multi_queries": [
                    query,
                    "technologies used by mangoit",
                    "mangoit tech stack",
                    "programming languages frameworks tools used by mangoit",
                    "mangoit development technologies",
                    "wordpress php javascript react laravel python ai development"
                ],
                "answer_requirements": ["list", "group_by_category", "citations"],
                "fallbacks": ["broaden_terms", "use_synonyms", "keyword_pre_filter"]
            }
        else:
            return {
                "intent": "lookup",
                "entities": [],
                "facets": [],
                "constraints": {"lang": "auto", "date": None, "region": "IN"},
                "multi_queries": [query],
                "answer_requirements": ["citations"],
                "fallbacks": ["broaden_terms", "use_synonyms"]
            }
