"""
Enhanced response generator that uses specialized prompt templates and knowledge base.
This can be used alongside or as a replacement for the existing response generation.
"""

import re
import asyncio
import datetime
from typing import Dict, Any, List, Optional
from utils.gemini_llm import GeminiLLM
from utils.logger import Logger, app_logger
from utils.prompt_templates import PromptTemplates
from utils.context_enhanced_templates import context_enhanced_templates
from utils.query_classifier import query_classifier
from utils.knowledge_base_manager import knowledge_base
from utils.greeting_templates import greeting_templates
from utils.agent_config import agent_config
from utils.appointment_state import is_in_appointment_flow, get_appointment_state, get_missing_fields, update_appointment_flow
from utils.appointment_scheduler import check_and_schedule_if_ready
from utils.appointment_guidance import get_appointment_guidance, generate_appointment_summary
from utils.validation import validate_email, validate_phone, get_validation_feedback
from utils.llm_extractor import extract_appointment_info

class EnhancedResponseGenerator:
    """Enhanced response generator using specialized prompt templates"""
    
    def __init__(self, model_name: str = "gemini-2.0-flash", logger: Optional[Logger] = None):
        """
        Initialize the enhanced response generator
        
        Args:
            model_name: Name of the LLM model to use
            logger: Optional logger instance
        """
        self.llm = GeminiLLM(
            model_name=model_name,
            temperature=0.3  # Lower temperature for more faithful responses
        )
        self.logger = logger or app_logger
    
    def _is_greeting(self, query: str) -> bool:
        """
        Check if the query is a greeting
        
        Args:
            query: The user's query
            
        Returns:
            bool: True if the query is a greeting, False otherwise
        """
        greeting_patterns = [
            r'^\s*h(?:i|ello|ey)\s*$',  # hi, hello, hey
            r'^\s*greetings\s*$',
            r'^\s*good\s*(?:morning|afternoon|evening)\s*$',
            r'^\s*namaste\s*$'
        ]
        
        return any(re.match(pattern, query.lower()) for pattern in greeting_patterns)
    
    def _get_greeting_response(self, conversation_history: str = "", is_new_conversation: bool = False) -> Dict[str, Any]:
        """
        Get a greeting response
        
        Args:
            conversation_history: Previous conversation history
            is_new_conversation: Flag indicating if this is a new conversation
            
        Returns:
            Dict[str, Any]: The greeting response
        """
        # Determine if this is a returning user
        # If is_new_conversation is explicitly provided, use that as the source of truth
        # Otherwise, check the conversation history
        is_returning = False
        if is_new_conversation is False:  # Explicitly not a new conversation
            is_returning = True
        elif is_new_conversation is True:  # Explicitly a new conversation
            is_returning = False
        elif conversation_history:  # Fall back to checking conversation history
            # Look for actual message exchanges, not just the current message
            message_count = conversation_history.count("User:")
            is_returning = message_count > 1  # More than just the current message
            
        self.logger.info(f"Greeting a {'returning' if is_returning else 'new'} user")
        
        # Get current hour for time-appropriate greeting
        current_hour = datetime.datetime.now().hour
        
        # Get greeting from templates
        if agent_config.SHORT_GREETINGS:
            greeting = greeting_templates.get_greeting(is_returning)
        else:
            # Combine time greeting with standard greeting
            time_greeting = greeting_templates.get_time_greeting(current_hour)
            standard_greeting = greeting_templates.get_greeting(is_returning)
            greeting = f"{time_greeting} {standard_greeting}"
        
        return {
            "response": greeting,
            "sources": []
        }
    
    async def generate_response(self, query: str, results: Any, conversation_history: str = "", is_new_conversation: bool = None, conversation_id: str = None) -> Dict[str, Any]:
        """
        Generate a response using specialized prompt templates
        
        Args:
            query: The user's query
            results: Retrieved information from the knowledge base
            conversation_history: Previous conversation history
            is_new_conversation: Flag indicating if this is a new conversation
            conversation_id: The conversation ID for state tracking
            
        Returns:
            Dict[str, Any]: The generated response with sources
        """
        # Check if we're in an appointment flow and need to handle appointment-related tasks
        if conversation_id:
            # Check if we're in an appointment flow
            if is_in_appointment_flow(conversation_id):
                self.logger.info(f"In appointment flow for conversation {conversation_id}")
                
                # Get missing fields
                missing_fields = get_missing_fields(conversation_id)
                
                if missing_fields:
                    # We still need to collect information
                    self.logger.info(f"Need to collect missing fields: {missing_fields}")
                    
                    # Extract information from the query
                    extracted_info = {}
                    
                    # Check for email in the query
                    if "email" in missing_fields:
                        email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
                        email_matches = re.findall(email_pattern, query)
                        if email_matches:
                            email = email_matches[0]
                            if validate_email(email):
                                extracted_info["email"] = email
                                self.logger.info(f"Extracted valid email: {email}")
                            else:
                                self.logger.info(f"Invalid email detected: {email}")
                                return {
                                    "response": get_validation_feedback("email", email),
                                    "sources": []
                                }
                    
                    # Check for phone number in the query
                    if "phone" in missing_fields:
                        # Look for digits that could be phone numbers
                        # This pattern is more lenient to catch various formats
                        phone_pattern = r'\b(?:\+?\d{1,3}[-\.\s]?)?\(?\d{3}\)?[-\.\s]?\d{3}[-\.\s]?\d{4}\b|\b\d{6,15}\b'
                        phone_matches = re.findall(phone_pattern, query)
                        if phone_matches:
                            phone = phone_matches[0]
                            # Clean up the phone number
                            digits_only = re.sub(r'\D', '', phone)
                            if len(digits_only) >= 6:  # At least a reasonable number of digits
                                if validate_phone(phone):
                                    extracted_info["phone"] = phone
                                    self.logger.info(f"Extracted valid phone: {phone}")
                                else:
                                    # For now, accept it but log the warning
                                    # This helps when users provide partial information
                                    extracted_info["phone"] = phone
                                    self.logger.info(f"Accepting phone with warning: {phone}")
                    
                    # Extract name from the query
                    if "name" in missing_fields:
                        # Try to extract name from common patterns
                        name_patterns = [
                            # Look for name at the beginning of a comma-separated list
                            r'^([A-Za-z0-9\s\.]+)\s*,',
                            # Look for "name: [name]" pattern
                            r'name[\s:]+([A-Za-z0-9\s\.]+)',
                            # Look for "I am [name]" pattern
                            r'(?:I am|I\'m)\s+([A-Za-z0-9\s\.]+)',
                            # Look for name before an email
                            r'^([A-Za-z0-9\s\.]+)\s+[A-Za-z0-9._%+-]+@'
                        ]
                        
                        name_found = False
                        for pattern in name_patterns:
                            name_match = re.search(pattern, query, re.IGNORECASE)
                            if name_match:
                                name = name_match.group(1).strip()
                                if len(name.split()) <= 5:  # Reasonable name length
                                    extracted_info["name"] = name
                                    self.logger.info(f"Extracted name: {name}")
                                    name_found = True
                                    break
                        
                        # Try to find short names at the beginning of the query
                        if not name_found:
                            # Look for short names (1-2 words) at the beginning of the query
                            short_name_match = re.match(r'^([A-Za-z][A-Za-z0-9\s\.]{0,29})\b', query)
                            if short_name_match:
                                name = short_name_match.group(1).strip()
                                if 1 <= len(name.split()) <= 3:  # Short name (1-3 words)
                                    # Don't use a name that's just digits (likely a phone number)
                                    if not re.match(r'^\d+$', name):
                                        extracted_info["name"] = name
                                        self.logger.info(f"Extracted short name: {name}")
                                        name_found = True
                        
                        # If no pattern matched and no other info extracted, use the whole query if it's short
                        if not name_found and not extracted_info and len(query.split()) <= 5:
                            extracted_info["name"] = query.strip()
                            self.logger.info(f"Using query as name: {query.strip()}")
                    
                    # If this is a response to a topic request, use the query as the topic
                    if "topic" in missing_fields and not any(extracted_info.keys()):
                        extracted_info["topic"] = query.strip()
                        self.logger.info(f"Using query as topic: {query.strip()}")
                    
                    # If regex extraction didn't find much, try LLM extraction as fallback
                    if len(extracted_info) == 0 or (len(missing_fields) > 1 and len(extracted_info) == 1):
                        self.logger.info("Using LLM for information extraction as fallback")
                        try:
                            # Use LLM to extract information
                            llm_extracted_info = await extract_appointment_info(query, missing_fields)
                            
                            # Merge with regex-extracted info, with regex taking precedence
                            for field, value in llm_extracted_info.items():
                                if field not in extracted_info and field in missing_fields:
                                    extracted_info[field] = value
                                    self.logger.info(f"LLM extracted {field}: {value}")
                        except Exception as e:
                            self.logger.error(f"Error using LLM for extraction: {str(e)}")
                    
                    # Update the appointment state with any extracted information
                    if extracted_info:
                        self.logger.info(f"Extracted information: {extracted_info}")
                        update_result = update_appointment_flow(conversation_id, extracted_info=extracted_info)
                        self.logger.info(f"Update result: {update_result}")
                        
                        # Get updated missing fields
                        missing_fields = get_missing_fields(conversation_id)
                        self.logger.info(f"Updated missing fields: {missing_fields}")
                        
                        # Check if all required information has been collected
                        if not missing_fields:
                            self.logger.info(f"All required information collected for conversation {conversation_id}")
                            # Get the current state to verify it's ready to schedule
                            state = get_appointment_state(conversation_id)
                            if state and state.get("meeting_request_stage") == "ready_to_schedule":
                                self.logger.info(f"APPOINTMENT READY TO SCHEDULE - Triggering scheduling API call for conversation {conversation_id}")
                            else:
                                self.logger.warning(f"Missing fields empty but state not ready to schedule for conversation {conversation_id}")
                                if state:
                                    self.logger.warning(f"Current stage: {state.get('meeting_request_stage')}")
                        
                        # If we extracted multiple pieces of information, acknowledge that
                        if len(extracted_info) > 1:
                            # Get a summary of what we've collected so far
                            state = get_appointment_state(conversation_id)
                            all_info = state.get("extracted_info", {}) if state else {}
                            
                            # Build a confirmation message
                            confirmation = "Thank you for providing your information. I've recorded:\n\n"
                            if "name" in all_info:
                                confirmation += f"- Name: {all_info['name']}\n"
                            if "email" in all_info:
                                confirmation += f"- Email: {all_info['email']}\n"
                            if "phone" in all_info:
                                confirmation += f"- Phone: {all_info['phone']}\n"
                            
                            # If we still have missing fields, ask for them
                            if missing_fields:
                                confirmation += "\nI still need the following information:\n\n"
                                for field in missing_fields:
                                    confirmation += f"- Your {field}\n"
                                confirmation += "\nCould you please provide the remaining details?"
                            else:
                                # All information collected, schedule the appointment now
                                self.logger.info(f"All information collected for conversation {conversation_id}, triggering appointment scheduling")
                                
                                # Explicitly call the scheduling function
                                self.logger.info(f"EXPLICIT SCHEDULING TRIGGER - Calling check_and_schedule_if_ready for conversation {conversation_id}")
                                try:
                                    scheduling_result = check_and_schedule_if_ready(conversation_id)
                                    self.logger.info(f"SCHEDULING RESULT: {scheduling_result}")
                                except Exception as e:
                                    self.logger.error(f"SCHEDULING ERROR: {str(e)}")
                                    self.logger.error(f"TRACEBACK: {__import__('traceback').format_exc()}")
                                    scheduling_result = None
                                
                                if scheduling_result and scheduling_result.get("success"):
                                    # Appointment was successfully scheduled
                                    self.logger.info(f"Appointment scheduled with ID: {scheduling_result.get('appointment_id')}")
                                    
                                    # Generate a summary of the appointment
                                    summary = generate_appointment_summary(conversation_id)
                                    
                                    # Check if this was a fallback to file saving
                                    if scheduling_result.get("method") == "file_fallback":
                                        # Use the custom fallback message if available
                                        if scheduling_result.get("message"):
                                            confirmation += f"\n\n{scheduling_result.get('message')}\n\nYour appointment ID is {scheduling_result.get('appointment_id')}.\n\n{summary}"
                                        else:
                                            confirmation += f"\n\nYour appointment has been scheduled locally. Your appointment ID is {scheduling_result.get('appointment_id')}.\n\n{summary}"
                                    else:
                                        # Normal API scheduling success
                                        confirmation += f"\n\nYour appointment has been scheduled! Your appointment ID is {scheduling_result.get('appointment_id')}.\n\n{summary}"
                                else:
                                    # There was an error scheduling the appointment
                                    error = scheduling_result.get('error') if scheduling_result else "Unknown error"
                                    self.logger.error(f"Error scheduling appointment: {error}")
                                    confirmation += "\n\nI've collected all your information, but I'm having trouble scheduling your appointment. Please try again or contact our team directly."
                            
                            return {
                                "response": confirmation,
                                "sources": []
                            }
                    
                    # Get guidance for the appointment flow
                    guidance = get_appointment_guidance(conversation_id, missing_fields)
                    
                    # Generate a response based on the guidance
                    if guidance["status"] == "complete":
                        # Show a summary of the collected information
                        summary = generate_appointment_summary(conversation_id)
                        return {
                            "response": f"{guidance['message']}\n\n{summary}",
                            "sources": []
                        }
                    else:
                        # Ask for the next piece of information
                        return {
                            "response": f"{guidance['message']}\n\n{guidance['prompt']}",
                            "sources": []
                        }
                else:
                    # All information collected, try to schedule
                    scheduling_result = check_and_schedule_if_ready(conversation_id)
                    
                    if scheduling_result and scheduling_result.get("success"):
                        # Appointment was successfully scheduled
                        self.logger.info(f"Appointment scheduled with ID: {scheduling_result.get('appointment_id')}")
                        
                        # Return a confirmation message
                        # Generate a summary of the appointment
                        summary = generate_appointment_summary(conversation_id)
                        return {
                            "response": f"Great! I've scheduled your appointment. Your appointment ID is {scheduling_result.get('appointment_id')}.\n\n{summary}",
                            "sources": []
                        }
                    elif scheduling_result and not scheduling_result.get("success"):
                        # There was an error scheduling the appointment
                        self.logger.error(f"Error scheduling appointment: {scheduling_result.get('error')}")
                        return {
                            "response": "I'm having trouble scheduling your appointment. Please try again or contact our team directly at contact@mangoitsolutions.com.",
                            "sources": []
                        }
        
        # Check if the query is a greeting
        if self._is_greeting(query):
            self.logger.info("Detected greeting, using greeting template")
            return self._get_greeting_response(conversation_history, is_new_conversation)
            
        # Extract content from results
        content_snippets = []
        sources = []
        
        # Handle different result formats
        if isinstance(results, list):
            self.logger.info(f"Processing {len(results)} results from vector DB")
            result_list = results
        elif isinstance(results, dict) and "results" in results:
            self.logger.info(f"Processing {len(results['results'])} results from vector DB")
            result_list = results["results"]
        else:
            self.logger.info("No results to process from vector DB")
            result_list = []
        
        # Process results
        for i, result in enumerate(result_list[:5]):  # Limit to top 5 results
            if "content" in result:
                # Clean the content
                clean_content = self._clean_content(result["content"])
                content_snippets.append(clean_content[:300])  # Limit length
                
                # Log snippet for debugging
                self.logger.info(f"Content snippet {i+1}: {clean_content[:100]}...")
                
                # Add source
                if "source" in result:
                    source = result["source"]
                    relevance = result.get("relevance", 0)
                    sources.append({
                        "source": source,
                        "relevance": relevance,
                        "chunk": result.get("id", i)
                    })
                    self.logger.info(f"Source {i+1}: {source} (relevance: {relevance:.2f})")
                else:
                    self.logger.warning(f"Result {i+1} has no source information")
        
        # Detect language
        language = self._detect_language(query)
        self.logger.info(f"Detected language: {language}")
        
        # Classify the query to determine the appropriate template
        classification = query_classifier.classify_query(query, conversation_history)
        query_type = classification["query_type"]
        self.logger.info(f"Query classified as: {query_type} (scores: {classification['scores']})")
        
        # Check for detected technologies
        detected_technologies = classification.get("detected_technologies", [])
        if detected_technologies:
            self.logger.info(f"Detected technologies: {', '.join(detected_technologies)}")
            
            # Enrich with knowledge base information
            for tech in detected_technologies:
                tech_info = knowledge_base.search_technologies(tech)
                if tech_info:
                    self.logger.info(f"Found knowledge base information for {tech}")
        
        # Select the appropriate prompt template based on query type
        if query_type == "technical":
            prompt = context_enhanced_templates.get_technical_prompt(
                query=query,
                retrieved_info={"results": result_list},
                conversation_history=conversation_history,
                detected_technologies=detected_technologies
            )
        elif query_type == "pricing":
            prompt = context_enhanced_templates.get_pricing_prompt(
                query=query,
                retrieved_info={"results": result_list},
                conversation_history=conversation_history,
                project_type=classification.get("project_type")
            )
        elif query_type == "scheduling":
            prompt = context_enhanced_templates.get_scheduling_prompt(
                query=query,
                retrieved_info={"results": result_list},
                conversation_history=conversation_history,
                meeting_details=classification.get("meeting_details")
            )
        else:  # Default to general template
            prompt = context_enhanced_templates.get_general_prompt(
                query=query,
                retrieved_info={"results": result_list},
                conversation_history=conversation_history
            )
        
        try:
            # Generate response using LLM with error handling
            try:
                # Try async generation first
                response = await self._async_generate(prompt)
            except RuntimeError:
                # Fall back to sync generation if in event loop
                self.logger.info("Falling back to synchronous generation")
                response = self.llm.generate(prompt)
            
            # Clean up the response
            response = response.strip()
            
            # Remove any "Sam:" or "Assistant:" prefixes that might be generated
            response = re.sub(r'^(Sam:|Assistant:)\s*', '', response)
            
            # Remove source citations from the response
            response = re.sub(r'\(source: [^\)]+, chunk: \d+\)', '', response)
            response = re.sub(r'\(source: [^\)]+\)', '', response)
            
            # Clean up any double spaces created by removing citations
            response = re.sub(r'\s{2,}', ' ', response)
            
            return {
                "response": response,
                "sources": sources
            }
            
        except Exception as e:
            self.logger.error(f"Error generating response with LLM: {str(e)}")
            
            # Fallback responses
            if not result_list:
                return {
                    "response": "I don't have enough information in my knowledge base to answer confidently. Could you clarify your question?",
                    "sources": []
                }
            else:
                return {
                    "response": f"Based on what I know, {content_snippets[0][:100] if content_snippets else 'we offer various services at MangoIT Solutions'}. How can I help you further?",
                    "sources": sources
                }
    
    async def _async_generate(self, prompt: str) -> str:
        """Generate response asynchronously"""
        return await asyncio.to_thread(self.llm.generate, prompt)
    
    def _clean_content(self, content: str) -> str:
        """Clean content by removing extra whitespace and special characters"""
        # Remove extra whitespace
        content = re.sub(r'\s+', ' ', content)
        # Remove markdown headers
        content = re.sub(r'#+\s+', '', content)
        # Remove special characters
        content = re.sub(r'[^\w\s.,;:!?()[\]{}"\'-]', '', content)
        return content.strip()
    
    def _detect_language(self, text: str) -> str:
        """
        Detect language of text
        
        Args:
            text: Text to detect language from
            
        Returns:
            str: Detected language code ('en', 'hi', or 'hinglish')
        """
        # Simple detection based on common Hindi/Hinglish words and characters
        hindi_words = ["namaste", "kaise", "kya", "hai", "aap", "tum", "hum", "accha", "theek", "nahin", "nahi"]
        hindi_chars = set("अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह")
        
        text_lower = text.lower()
        
        # Check for Hindi characters
        if any(char in hindi_chars for char in text):
            return "hi"
        
        # Count Hindi words
        hindi_word_count = sum(1 for word in hindi_words if word in text_lower)
        
        # Determine language based on word count
        if hindi_word_count >= 2:
            return "hinglish"
        
        return "en"  # Default to English

# Create a singleton instance
enhanced_response_generator = EnhancedResponseGenerator()
