import os
import sys
import time
import argparse
from dotenv import load_dotenv
from utils.chroma_db import ChromaDBManager

# Load environment variables
load_dotenv()

def main():
    """
    Check the contents of the ChromaDB collection
    """
    print("ChromaDB Content Checker")
    print("=======================\n")
    
    # Initialize ChromaDB manager
    chroma_db = ChromaDBManager()
    
    # Get all collections
    collections = chroma_db.client.list_collections()
    print(f"Found {len(collections)} collections in ChromaDB\n")
    
    # Print all collections
    print("--- All Collections in ChromaDB ---")
    if collections:
        for collection in collections:
            print(f"Collection: {collection.name}, Count: {collection.count()}")
    else:
        print("No collections found.")
    
    # Check specific collection
    collection_name = "mangoit_pages_only"
    print(f"\n--- Collection Information for '{collection_name}' ---")
    
    try:
        collection = chroma_db.client.get_collection(name=collection_name)
        print(f"Exists: True")
        print(f"Count: {collection.count()} documents")
        
        # Get collection info
        collection_info = chroma_db.get_collection_info(collection_name)
        
        # Print content types if available
        content_types = collection_info.get('content_types', {})
        if content_types:
            print("\n=== Content Types ===")
            for content_type, count in content_types.items():
                print(f"{content_type}: {count} documents")
        
        # Print unique sources count
        unique_sources = collection_info.get('unique_sources', 0)
        print(f"\nUnique sources: {unique_sources}")
        
    except Exception as e:
        print(f"Exists: False")
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    main()
