import asyncio
import httpx
import json
import time
import os
from dotenv import load_dotenv
from utils.url_config import API_BASE_URL

# Load environment variables
load_dotenv()

# Get collection name from environment variable or use default
RAG_COLLECTION = os.getenv("RAG_COLLECTION", "mangoit_docs_miniLM")

# Get embedding backend from environment variable or use default
DEFAULT_EMBED_BACKEND = "all-MiniLM-L6-v2"
EMBED_BACKEND = os.getenv("EMBED_BACKEND", DEFAULT_EMBED_BACKEND)

# Valid embedding backends
VALID_EMBED_BACKENDS = ["all-MiniLM-L6-v2", "gemini-embedding-001", "bge-large-en-v1.5"]

async def embed_all_markdown(collection_name=RAG_COLLECTION):
    """
    Embed all markdown files into a single collection using the improved parameters
    
    Args:
        collection_name: Name of the collection to use
    """
    print(f"Embedding all markdown files into collection '{collection_name}'...")
    print("Using improved chunking parameters for better context preservation")
    
    # Prepare the request payload with improved parameters
    payload = {
        "collection_name": collection_name,
        "chunk_size": 1500,  # Increased for better context
        "chunk_overlap": 250,  # Increased for better overlap
        "embed_backend": EMBED_BACKEND,  # Use the backend from environment variable
        "gemini_model": "gemini-embedding-001"  # Only used if embed_backend is gemini
    }
    
    # Validate the embedding backend
    if EMBED_BACKEND not in VALID_EMBED_BACKENDS:
        print(f"Warning: Invalid embedding backend '{EMBED_BACKEND}'. Using default: {DEFAULT_EMBED_BACKEND}")
        payload["embed_backend"] = DEFAULT_EMBED_BACKEND
    else:
        print(f"Using embedding backend: {EMBED_BACKEND}")
        
    # If using bge model, add specific parameters
    if EMBED_BACKEND == "bge-large-en-v1.5":
        payload["normalize_embeddings"] = True  # BGE models typically need normalization
    
    start_time = time.time()
    
    # Send the request to the FastAPI endpoint
    try:
        async with httpx.AsyncClient(timeout=300.0) as client:  # Increased timeout for large collections
            print("Sending request to embed all markdown files...")
            response = await client.post(
                f"{API_BASE_URL}/embed-all-markdown",
                json=payload
            )
            
            if response.status_code == 200:
                result = response.json()
                print("\n=== Embedding Results ===")
                print(f"Status: {result['status']}")
                print(f"Message: {result['message']}")
                print(f"Pages embedded: {result['pages_embedded']} chunks")
                print(f"Posts embedded: {result['posts_embedded']} chunks")
                print(f"Total embedded: {result['total_embedded']} chunks")
                print(f"Embedding model: {result['embedding_model']}")
                print(f"Collection name: {result['collection_name']}")
                print(f"Time taken: {time.time() - start_time:.2f} seconds")
                return result
            else:
                print(f"Error: {response.status_code}")
                print(f"Response: {response.text}")
                return None
    except Exception as e:
        print(f"Error embedding markdown files: {str(e)}")
        return None

async def check_collection_status():
    """Check the status of the collection after embedding"""
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(f"{API_BASE_URL}/collection-info")
            if response.status_code == 200:
                result = response.json()
                print("\n=== Collection Status ===")
                print(f"Collection name: {result['name']}")
                print(f"Document count: {result['count']}")
                print(f"Exists: {result['exists']}")
                return result
            else:
                print(f"Error checking collection status: {response.status_code}")
                return None
    except Exception as e:
        print(f"Error checking collection status: {str(e)}")
        return None

async def main():
    # First, check if the FastAPI server is running
    try:
        print("Checking if FastAPI server is running...")
        async with httpx.AsyncClient(timeout=10.0) as client:
            try:
                print("Attempting to connect to health endpoint...")
                response = await client.get(f"{API_BASE_URL}/health")
                print(f"Health endpoint response status: {response.status_code}")
                if response.status_code != 200:
                    print(f"Error: Health endpoint returned status {response.status_code}")
                    print("Trying alternative endpoint...")
                    # Try docs endpoint as fallback
                    docs_response = await client.get(f"{API_BASE_URL}/docs")
                    if docs_response.status_code == 200:
                        print("Server is running (docs endpoint accessible)")
                    else:
                        print("Error: FastAPI server is not running or not accessible")
                        return
            except httpx.ConnectError as e:
                print(f"Connection error: {str(e)}")
                print("Trying alternative endpoint...")
                try:
                    # Try docs endpoint as fallback
                    docs_response = await client.get(f"{API_BASE_URL}/docs")
                    if docs_response.status_code == 200:
                        print("Server is running (docs endpoint accessible)")
                    else:
                        print("Error: FastAPI server is not running or not accessible")
                        return
                except Exception as inner_e:
                    print(f"Error connecting to server: {str(inner_e)}")
                    print("Error: FastAPI server is not running. Please start the server with 'uvicorn main:app --reload'")
                    return
    except Exception as e:
        print(f"Error checking server status: {str(e)}")
        print("Error: FastAPI server is not running. Please start the server with 'uvicorn main:app --reload'")
        return
    
    # Embed all markdown files
    result = await embed_all_markdown()
    
    if result:
        # Check collection status after embedding
        await check_collection_status()
        
        print("\n=== Next Steps ===")
        print("1. Test the RAG system with queries")
        print("2. Monitor the logs for any errors")
        print("3. Adjust relevance thresholds if needed")

if __name__ == "__main__":
    asyncio.run(main())
