import asyncio
import httpx
import json
import time
import os
from dotenv import load_dotenv
from utils.url_config import API_BASE_URL

load_dotenv()

async def embed_markdown_directory(directory, collection_name="mangoit_docs"):
    """
    Embed markdown files from a directory into ChromaDB
    
    Args:
        directory: Directory containing markdown files
        collection_name: Name of the collection
    """
    print(f"Embedding files from {directory} into collection {collection_name}...")
    
    # Prepare the request payload
    payload = {
        "directory": directory,
        "collection_name": collection_name,
        "chunk_size": 500,
        "chunk_overlap": 50
    }
    
    # Send the request to the FastAPI endpoint
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{API_BASE_URL}/embed-markdown",
            json=payload
        )
        
        if response.status_code == 200:
            result = response.json()
            print(f"Status: {result['status']}")
            print(f"Message: {result['message']}")
            print(f"Documents embedded: {result['documents_embedded']}")
            return result
        else:
            print(f"Error: {response.status_code}")
            print(f"Response: {response.text}")
            return None

async def main():
    # Embed pages markdown files
    pages_result = await embed_markdown_directory("markdown-data/pages_markdown", "mangoit_pages")
    
    # Wait a moment before embedding posts
    time.sleep(2)
    
    # Embed posts markdown files
    posts_result = await embed_markdown_directory("markdown-data/posts_markdown", "mangoit_posts")
    
    # Embed all markdown files into a combined collection
    time.sleep(2)
    all_result = await embed_markdown_directory("markdown-data", "mangoit_all")
    
    # Print summary
    print("\nEmbedding Summary:")
    print(f"Pages: {pages_result['documents_embedded'] if pages_result else 'Failed'} chunks")
    print(f"Posts: {posts_result['documents_embedded'] if posts_result else 'Failed'} chunks")
    print(f"Combined: {all_result['documents_embedded'] if all_result else 'Failed'} chunks")

if __name__ == "__main__":
    asyncio.run(main())
