"""
Knowledge Base Manager for accessing structured information about MangoIT capabilities.
"""

import os
import yaml
from typing import Dict, Any, List, Optional, Union
import re

class KnowledgeBaseManager:
    """Manager for accessing and querying the knowledge base"""
    
    def __init__(self, kb_dir: str = "kb"):
        """
        Initialize the knowledge base manager
        
        Args:
            kb_dir: Directory containing knowledge base YAML files
        """
        self.kb_dir = kb_dir
        self.kb_data = {}
        self._load_knowledge_base()
    
    def _load_knowledge_base(self):
        """Load all knowledge base YAML files"""
        if not os.path.exists(self.kb_dir):
            print(f"Knowledge base directory '{self.kb_dir}' not found")
            return
            
        for filename in os.listdir(self.kb_dir):
            if filename.endswith('.yaml') or filename.endswith('.yml'):
                file_path = os.path.join(self.kb_dir, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        # Use the filename without extension as the key
                        key = os.path.splitext(filename)[0]
                        self.kb_data[key] = yaml.safe_load(f)
                        print(f"Loaded knowledge base file: {filename}")
                except Exception as e:
                    print(f"Error loading knowledge base file {filename}: {str(e)}")
    
    def get_all_data(self) -> Dict[str, Any]:
        """Get all knowledge base data"""
        return self.kb_data
    
    def get_technical_capabilities(self) -> Dict[str, Any]:
        """Get technical capabilities information"""
        return self.kb_data.get('technical_capabilities', {}).get('core_technologies', {})
    
    def get_pricing_information(self) -> Dict[str, Any]:
        """Get pricing information"""
        return self.kb_data.get('technical_capabilities', {}).get('pricing_models', {})
    
    def get_contact_information(self) -> Dict[str, Any]:
        """Get contact information"""
        return self.kb_data.get('technical_capabilities', {}).get('contact_information', {})
    
    def get_specialized_services(self) -> Dict[str, Any]:
        """Get specialized services information"""
        return self.kb_data.get('technical_capabilities', {}).get('specialized_services', {})
    
    def get_company_information(self) -> Dict[str, Any]:
        """Get company information"""
        return self.kb_data.get('technical_capabilities', {}).get('company', {})
    
    def search_technologies(self, query: str) -> List[Dict[str, Any]]:
        """
        Search for technologies matching the query
        
        Args:
            query: Search query
            
        Returns:
            List of matching technologies
        """
        results = []
        query_terms = self._extract_search_terms(query)
        
        # Search in core technologies
        tech_categories = self.get_technical_capabilities()
        for category, technologies in tech_categories.items():
            for tech in technologies:
                # Check if any query term matches the technology name
                if any(term.lower() in tech.get('name', '').lower() for term in query_terms):
                    tech_info = dict(tech)
                    tech_info['category'] = category
                    results.append(tech_info)
                # Also check frameworks if available
                if 'frameworks' in tech:
                    for framework in tech['frameworks']:
                        if any(term.lower() in framework.lower() for term in query_terms):
                            tech_info = dict(tech)
                            tech_info['category'] = category
                            tech_info['matched_framework'] = framework
                            results.append(tech_info)
        
        return results
    
    def detect_technologies_in_text(self, text: str) -> List[str]:
        """
        Detect mentioned technologies in text
        
        Args:
            text: Text to analyze
            
        Returns:
            List of detected technologies
        """
        detected = set()
        
        # Get all technology names
        all_tech_names = []
        tech_categories = self.get_technical_capabilities()
        for category, technologies in tech_categories.items():
            for tech in technologies:
                all_tech_names.append(tech.get('name', '').lower())
                # Add frameworks if available
                if 'frameworks' in tech:
                    all_tech_names.extend([f.lower() for f in tech['frameworks']])
        
        # Check for technology names in text
        for tech_name in all_tech_names:
            if tech_name and len(tech_name) > 2:  # Avoid short names that might cause false positives
                pattern = r'\b' + re.escape(tech_name) + r'\b'
                if re.search(pattern, text.lower()):
                    detected.add(tech_name.title())  # Add with title case
        
        return list(detected)
    
    def get_case_studies(self, industry: Optional[str] = None, technology: Optional[str] = None) -> List[Dict[str, Any]]:
        """
        Get relevant case studies
        
        Args:
            industry: Optional industry filter
            technology: Optional technology filter
            
        Returns:
            List of matching case studies
        """
        results = []
        specialized_services = self.get_specialized_services()
        
        for service_key, service_data in specialized_services.items():
            if 'case_studies' in service_data:
                for case_study in service_data['case_studies']:
                    # Apply filters if provided
                    industry_match = not industry or (industry.lower() in case_study.get('industry', '').lower())
                    tech_match = not technology or any(technology.lower() in tech.lower() for tech in case_study.get('technologies', []))
                    
                    if industry_match and tech_match:
                        case_study_info = dict(case_study)
                        case_study_info['service_category'] = service_key
                        results.append(case_study_info)
        
        return results
    
    def get_pricing_estimate(self, project_type: str, team_size: Optional[int] = None) -> Dict[str, Any]:
        """
        Get pricing estimate based on project type and team size
        
        Args:
            project_type: Type of project (e.g., 'web', 'mobile', 'ai')
            team_size: Optional team size
            
        Returns:
            Pricing estimate information
        """
        pricing_info = self.get_pricing_information()
        
        # Default response with general pricing info
        response = {
            'hourly_rates': pricing_info.get('hourly_rates', {}),
            'project_based': pricing_info.get('project_based', {}),
            'dedicated_team': pricing_info.get('dedicated_team', {})
        }
        
        # Add specific project type info if available
        project_type = project_type.lower()
        if 'project_based' in pricing_info and 'typical_projects' in pricing_info['project_based']:
            specific_projects = []
            for project in pricing_info['project_based']['typical_projects']:
                if project_type in project.lower():
                    specific_projects.append(project)
            
            if specific_projects:
                response['specific_project_estimates'] = specific_projects
        
        # Add team size specific info if provided
        if team_size and 'dedicated_team' in pricing_info and 'typical_configurations' in pricing_info['dedicated_team']:
            team_configs = []
            for config in pricing_info['dedicated_team']['typical_configurations']:
                # Extract team size range from config description
                match = re.search(r'(\d+)-(\d+)', config)
                if match:
                    min_size, max_size = int(match.group(1)), int(match.group(2))
                    if min_size <= team_size <= max_size:
                        team_configs.append(config)
                elif "+" in config:
                    # Handle "8+ developers" type configurations
                    min_size = int(re.search(r'(\d+)\+', config).group(1))
                    if team_size >= min_size:
                        team_configs.append(config)
            
            if team_configs:
                response['recommended_team_configuration'] = team_configs
        
        return response
    
    def _extract_search_terms(self, query: str) -> List[str]:
        """Extract search terms from query"""
        # Remove common words and split into terms
        common_words = {'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'with', 'about', 'is', 'are', 'do', 'does'}
        terms = [term.strip() for term in query.lower().split() if term.strip() and term.strip() not in common_words]
        
        # Add the original query as a term for exact matches
        if query.strip() and len(query.split()) > 1:
            terms.append(query.strip().lower())
            
        return terms

# Create a singleton instance
knowledge_base = KnowledgeBaseManager()
