"""
Query classifier for determining the type of user query and selecting appropriate templates.
"""

import re
from typing import Dict, Any, List, Tuple, Optional
from utils.knowledge_base_manager import knowledge_base

class QueryClassifier:
    """Classifies user queries to determine the appropriate response template"""
    
    # Keywords for different query types
    TECHNICAL_KEYWORDS = [
        'technology', 'technologies', 'tech', 'stack', 'framework', 'language', 'platform',
        'frontend', 'backend', 'mobile', 'web', 'cloud', 'database', 'ai', 'ml', 'devops',
        'react', 'angular', 'vue', 'node', 'python', 'php', 'java', 'flutter', 'swift',
        'aws', 'azure', 'docker', 'kubernetes', 'mongodb', 'postgresql', 'mysql',
        'how', 'implement', 'develop', 'build', 'create', 'architecture', 'design'
    ]
    
    PRICING_KEYWORDS = [
        'price', 'pricing', 'cost', 'rate', 'budget', 'quote', 'estimate', 'fee',
        'charge', 'expensive', 'affordable', 'cheap', 'package', 'plan', 'subscription',
        'hourly', 'monthly', 'fixed', 'retainer', 'contract', 'payment', 'invest',
        'how much', 'what is the cost', 'how many', 'discount', 'offer', 'deal'
    ]
    
    SCHEDULING_KEYWORDS = [
        'schedule', 'meeting', 'appointment', 'call', 'discuss', 'talk', 'connect',
        'availability', 'available', 'calendar', 'time', 'date', 'slot', 'book',
        'contact', 'reach', 'meet', 'consultation', 'demo', 'presentation',
        'when', 'tomorrow', 'today', 'next week', 'morning', 'afternoon', 'evening'
    ]
    
    def __init__(self):
        """Initialize the query classifier"""
        pass
    
    def classify_query(self, query: str, conversation_history: str = "") -> Dict[str, Any]:
        """
        Classify the user query to determine its type
        
        Args:
            query: The user's query
            conversation_history: Previous conversation history
            
        Returns:
            Dict with query type and additional metadata
        """
        query_lower = query.lower()
        
        # Check for detected technologies in the query
        detected_technologies = knowledge_base.detect_technologies_in_text(query)
        
        # Calculate scores for each query type
        technical_score = self._calculate_keyword_score(query_lower, self.TECHNICAL_KEYWORDS)
        pricing_score = self._calculate_keyword_score(query_lower, self.PRICING_KEYWORDS)
        scheduling_score = self._calculate_keyword_score(query_lower, self.SCHEDULING_KEYWORDS)
        
        # Add bonus for detected technologies
        if detected_technologies:
            technical_score += 0.3
        
        # Extract meeting details if it's a scheduling query
        meeting_details = None
        if scheduling_score > 0.2:
            meeting_details = self._extract_meeting_details(query, conversation_history)
        
        # Extract project type for pricing queries
        project_type = None
        if pricing_score > 0.2:
            project_type = self._extract_project_type(query, conversation_history)
        
        # Determine the primary query type
        scores = {
            'technical': technical_score,
            'pricing': pricing_score,
            'scheduling': scheduling_score,
            'general': 0.1  # Base score for general queries
        }
        
        primary_type = max(scores, key=scores.get)
        
        # Only classify as a specific type if the score is above threshold
        if scores[primary_type] < 0.2:
            primary_type = 'general'
        
        return {
            'query_type': primary_type,
            'scores': scores,
            'detected_technologies': detected_technologies,
            'meeting_details': meeting_details,
            'project_type': project_type
        }
    
    def _calculate_keyword_score(self, query: str, keywords: List[str]) -> float:
        """
        Calculate a score based on keyword matches
        
        Args:
            query: The user's query
            keywords: List of keywords to match
            
        Returns:
            Score between 0 and 1
        """
        score = 0
        for keyword in keywords:
            # Check for exact word matches with word boundaries
            pattern = r'\b' + re.escape(keyword) + r'\b'
            matches = re.findall(pattern, query)
            if matches:
                # More specific multi-word keywords get higher scores
                word_count = len(keyword.split())
                score += 0.1 * word_count * len(matches)
        
        # Cap the score at 1.0
        return min(score, 1.0)
    
    def _extract_meeting_details(self, query: str, conversation_history: str) -> Dict[str, Any]:
        """
        Extract meeting details from query and conversation history
        
        Args:
            query: The user's query
            conversation_history: Previous conversation history
            
        Returns:
            Dict with meeting details
        """
        details = {}
        
        # Extract date
        date_patterns = [
            (r'\b(today)\b', 'today'),
            (r'\b(tomorrow)\b', 'tomorrow'),
            (r'\b(next\s+(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday))\b', lambda m: m.group(1)),
            (r'\b(\d{1,2}(?:st|nd|rd|th)?\s+(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)(?:\w*))\b', lambda m: m.group(1))
        ]
        
        for pattern, extractor in date_patterns:
            match = re.search(pattern, query.lower())
            if match:
                details['date'] = extractor(match) if callable(extractor) else extractor
                break
        
        # Extract time
        time_patterns = [
            (r'\b(\d{1,2}(?::\d{2})?\s*(?:am|pm))\b', lambda m: m.group(1)),
            (r'\b(\d{1,2}(?::\d{2})?)\s*(?:hours|hrs|o\'clock)\b', lambda m: m.group(1) + " hours"),
            (r'\b(morning|afternoon|evening|night)\b', lambda m: m.group(1))
        ]
        
        for pattern, extractor in time_patterns:
            match = re.search(pattern, query.lower())
            if match:
                details['time'] = extractor(match) if callable(extractor) else extractor
                break
        
        # Extract timezone
        timezone_patterns = [
            (r'\b(pst|est|cst|mst|utc|gmt|ist|edt|pdt|cet|bst)(?:\s|$|\W)', lambda m: m.group(1).upper())
        ]
        
        for pattern, extractor in timezone_patterns:
            match = re.search(pattern, query.lower())
            if match:
                details['timezone'] = extractor(match) if callable(extractor) else extractor
                break
        
        # Extract topic (if any)
        if 'discuss' in query.lower() or 'talk about' in query.lower() or 'regarding' in query.lower():
            topic_patterns = [
                (r'(?:discuss|talk about|regarding|about)\s+([\w\s]+?)(?:with|\.|\?|$)', lambda m: m.group(1).strip())
            ]
            
            for pattern, extractor in topic_patterns:
                match = re.search(pattern, query.lower())
                if match:
                    details['topic'] = extractor(match) if callable(extractor) else extractor
                    break
        
        return details if details else None
    
    def _extract_project_type(self, query: str, conversation_history: str) -> Optional[str]:
        """
        Extract project type from query and conversation history
        
        Args:
            query: The user's query
            conversation_history: Previous conversation history
            
        Returns:
            Project type if found, None otherwise
        """
        # Common project types
        project_types = [
            'web', 'mobile', 'app', 'website', 'e-commerce', 'ecommerce', 
            'ai', 'ml', 'machine learning', 'chatbot', 'dashboard', 'crm', 
            'enterprise', 'saas', 'platform', 'portal', 'api'
        ]
        
        # Check query for project types
        for project_type in project_types:
            pattern = r'\b' + re.escape(project_type) + r'\b'
            if re.search(pattern, query.lower()):
                return project_type
        
        # If not found in query, check conversation history
        if conversation_history:
            for project_type in project_types:
                pattern = r'\b' + re.escape(project_type) + r'\b'
                if re.search(pattern, conversation_history.lower()):
                    return project_type
        
        return None

# Create a singleton instance
query_classifier = QueryClassifier()
