import asyncio
import httpx
import json
import time
import os
from dotenv import load_dotenv
from utils.url_config import API_BASE_URL

# Load environment variables
load_dotenv()

# Set the collection name and embedding model
COLLECTION_NAME = "BGE-large-en"
EMBED_BACKEND = "bge-large-en-v1.5"

async def reset_collection():
    """Reset the collection to start fresh"""
    print(f"Resetting collection '{COLLECTION_NAME}'...")
    
    try:
        async with httpx.AsyncClient() as client:
            response = await client.delete(
                f"{API_BASE_URL}/reset-collection/{COLLECTION_NAME}"
            )
            
            if response.status_code == 200:
                result = response.json()
                print(f"Collection reset: {result['message']}")
                return True
            else:
                print(f"Error resetting collection: {response.status_code}")
                print(f"Response: {response.text}")
                return False
    except Exception as e:
        print(f"Error resetting collection: {str(e)}")
        return False

async def embed_all_markdown():
    """Embed all markdown files with the BGE model"""
    print(f"Embedding all markdown files into collection '{COLLECTION_NAME}' using {EMBED_BACKEND}...")
    
    # Prepare the request payload with improved parameters
    payload = {
        "collection_name": COLLECTION_NAME,
        "chunk_size": 1500,  # Increased for better context
        "chunk_overlap": 250,  # Increased for better overlap
        "embed_backend": EMBED_BACKEND,
        "gemini_model": "gemini-embedding-001",
        "normalize_embeddings": True,  # Important for BGE models
        "force_reembed": True  # Force re-embedding even if files are unchanged
    }
    
    start_time = time.time()
    
    # Send the request to the FastAPI endpoint
    try:
        async with httpx.AsyncClient(timeout=600.0) as client:  # Increased timeout for large collections
            print("Sending request to embed all markdown files...")
            response = await client.post(
                f"{API_BASE_URL}/embed-all-markdown",
                json=payload
            )
            
            if response.status_code == 200:
                result = response.json()
                print("\n=== Embedding Results ===")
                print(f"Status: {result['status']}")
                print(f"Message: {result['message']}")
                print(f"Pages embedded: {result['pages_embedded']} chunks")
                print(f"Posts embedded: {result['posts_embedded']} chunks")
                print(f"Total embedded: {result['total_embedded']} chunks")
                print(f"Embedding model: {result['embedding_model']}")
                print(f"Collection name: {result['collection_name']}")
                print(f"Time taken: {time.time() - start_time:.2f} seconds")
                return result
            else:
                print(f"Error: {response.status_code}")
                print(f"Response: {response.text}")
                return None
    except Exception as e:
        print(f"Error embedding markdown files: {str(e)}")
        return None

async def check_collection_status():
    """Check the status of the collection after embedding"""
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"{API_BASE_URL}/collection-info?collection_name={COLLECTION_NAME}"
            )
            if response.status_code == 200:
                result = response.json()
                print("\n=== Collection Status ===")
                print(f"Collection name: {result['name']}")
                print(f"Document count: {result['count']}")
                print(f"Exists: {result['exists']}")
                return result
            else:
                print(f"Error checking collection status: {response.status_code}")
                return None
    except Exception as e:
        print(f"Error checking collection status: {str(e)}")
        return None

async def main():
    # First, check if the FastAPI server is running
    try:
        print("Checking if FastAPI server is running...")
        async with httpx.AsyncClient(timeout=10.0) as client:
            try:
                # Use /docs endpoint instead of /health since it exists by default in FastAPI
                response = await client.get(f"{API_BASE_URL}/docs")
                if response.status_code != 200:
                    print("Error: FastAPI server is not running. Please start the server with 'uvicorn main:app --reload'")
                    return
                print("FastAPI server is running!")
            except Exception:
                print("Error: FastAPI server is not running. Please start the server with 'uvicorn main:app --reload'")
                return
    except Exception as e:
        print(f"Error checking server status: {str(e)}")
        return
    
    # Skip reset and directly embed all markdown files
    print("Proceeding directly to embedding with force_reembed=True...")
    result = await embed_all_markdown()
    
    if result:
        # Check collection status after embedding
        await check_collection_status()
        
        print("\n=== Next Steps ===")
        print("1. Test the RAG system with the new embedding model")
        print("2. Compare results with the previous model")
        print("3. Update your .env file to use this collection if you prefer the results")

if __name__ == "__main__":
    asyncio.run(main())
