import streamlit as st
import os
import xml.etree.ElementTree as ET
import re
from huggingface_hub import InferenceClient

# Coptic alphabet helper
COPTIC_ALPHABET = {
    'Ⲁ': 'Alpha', 'Ⲃ': 'Beta', 'Ⲅ': 'Gamma', 'Ⲇ': 'Delta', 'Ⲉ': 'Epsilon', 'Ⲋ': 'Zeta',
    'Ⲏ': 'Eta', 'Ⲑ': 'Theta', 'Ⲓ': 'Iota', 'Ⲕ': 'Kappa', 'Ⲗ': 'Lambda', 'Ⲙ': 'Mu',
    'Ⲛ': 'Nu', 'Ⲝ': 'Xi', 'Ⲟ': 'Omicron', 'Ⲡ': 'Pi', 'Ⲣ': 'Rho', 'Ⲥ': 'Sigma',
    'Ⲧ': 'Tau', 'Ⲩ': 'Upsilon', 'Ⲫ': 'Phi', 'Ⲭ': 'Chi', 'Ⲯ': 'Psi', 'Ⲱ': 'Omega',
    'Ϣ': 'Shai', 'Ϥ': 'Fai', 'Ϧ': 'Khei', 'Ϩ': 'Hori', 'Ϫ': 'Gangia', 'Ϭ': 'Shima', 'Ϯ': 'Ti'
}

# Coptic linguistic prompts (will be formatted with target language)
def get_coptic_prompts(target_language):
    """Generate Coptic analysis prompts with specified target language"""
    return {
        'dialect_analysis': f"Analyze the Coptic dialect of this text and identify linguistic features. Respond in {target_language}:",
        'translation': f"You are a professional Coptic translator. Translate the following Coptic text to {target_language}.\n\nIMPORTANT: Provide ONLY the direct translation. Do not include:\n- The original Coptic text\n- Explanations or commentary\n- Notes about context or meaning\n- Any text other than the {target_language} translation\n\nCoptic text to translate:",
        'transcription': f"Provide a romanized transcription of this Coptic text. Respond in {target_language}:",
        'morphology': f"Analyze the morphological structure of these Coptic words. Respond in {target_language}:",
        'lexicon_lookup': f"Look up these Coptic words and provide definitions with Greek etymologies. Respond in {target_language}:"
    }

# Lexicon loader
@st.cache_data
def load_coptic_lexicon(file_path=None):
    """Load Coptic lexicon from various formats including TEI XML"""
    if not file_path or not os.path.exists(file_path):
        return {}
    
    lexicon = {}
    
    try:
        # Handle XML format (TEI structure for Comprehensive Coptic Lexicon)
        if file_path.endswith('.xml'):
            tree = ET.parse(file_path)
            root = tree.getroot()
            
            # Handle TEI namespace
            ns = {'tei': 'http://www.tei-c.org/ns/1.0'}
            
            # Find entries in TEI format
            entries = root.findall('.//tei:entry', ns)
            
            for entry in entries[:100]:  # Limit to first 100 entries for performance
                coptic_word = ""
                definition = ""
                
                # Extract Coptic headword from TEI structure
                form = entry.find('.//tei:form[@type="lemma"]', ns) or entry.find('.//tei:form', ns)
                if form is not None:
                    orth = form.find('.//tei:orth', ns)
                    if orth is not None and orth.text:
                        coptic_word = orth.text.strip()
                
                # Extract definition from sense elements
                senses = entry.findall('.//tei:sense', ns)
                definitions = []
                for sense in senses[:2]:  # Limit to first 2 senses
                    def_elem = sense.find('.//tei:def', ns)
                    if def_elem is not None and def_elem.text:
                        definitions.append(def_elem.text.strip())
                
                if definitions:
                    definition = "; ".join(definitions)
                
                # Clean and store
                if coptic_word and definition:
                    # Clean Coptic word (preserve Coptic and Greek Unicode)
                    coptic_word = re.sub(r'[^\u2C80-\u2CFF\u03B0-\u03FF\u1F00-\u1FFF\w\s\-]', '', coptic_word).strip()
                    if coptic_word:
                        lexicon[coptic_word] = definition[:200]  # Limit definition length
        
        # Handle text formats
        else:
            with open(file_path, 'r', encoding='utf-8') as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    
                    # Support multiple separators
                    separator = None
                    for sep in ['\t', '|', ',', ';']:
                        if sep in line:
                            separator = sep
                            break
                    
                    if separator:
                        parts = line.split(separator, 1)
                        if len(parts) >= 2:
                            coptic_word = parts[0].strip()
                            definition = parts[1].strip()
                            lexicon[coptic_word] = definition
    
    except Exception as e:
        st.error(f"Error loading lexicon: {str(e)}")
    
    return lexicon

# Language detection and UI
LANGUAGES = {
    'en': 'English', 'es': 'Español', 'fr': 'Français', 'de': 'Deutsch',
    'zh': '中文', 'ja': '日本語', 'ar': 'العربية', 'hi': 'हिन्दी',
    'cop': 'Coptic (ⲘⲉⲧⲢⲉⲙ̀ⲛⲭⲏⲙⲓ)', 'cop-sa': 'Sahidic Coptic', 'cop-bo': 'Bohairic Coptic'
}

st.set_page_config(page_title="Apertus Chat", layout="wide")

# Language selector
selected_lang = st.selectbox("Language / Langue / Idioma", 
                           options=list(LANGUAGES.keys()),
                           format_func=lambda x: LANGUAGES[x])

# Sidebar for Coptic tools
with st.sidebar:
    st.header("Coptic Tools")

    # HuggingFace API Token input
    st.subheader("🔑 API Configuration")
    hf_token_input = st.text_input(
        "HuggingFace API Token",
        type="password",
        help="Required for Apertus-8B translation. Get your token at: https://huggingface.co/settings/tokens"
    )
    if hf_token_input:
        st.success("✅ API token configured")
    else:
        st.warning("⚠️ Translation requires an API token")
        st.markdown("[Get your free HF token →](https://huggingface.co/settings/tokens)")

    st.divider()

    # Lexicon file uploader
    st.subheader("📚 Lexicon Upload")
    lexicon_file = st.file_uploader(
        "Upload Coptic Lexicon (optional)",
        type=['txt', 'tsv', 'csv', 'xml'],
        help="Supports: Text (TAB/pipe separated), XML (TEI format), CSV\nNote: Comprehensive lexicon is pre-loaded"
    )
    
    # Load lexicon
    if lexicon_file:
        try:
            # Check file size (max 20MB)
            file_size = len(lexicon_file.getvalue())
            if file_size > 20 * 1024 * 1024:
                st.error("❌ File too large (max 20MB)")
                coptic_lexicon = {}
            else:
                # Save uploaded file temporarily
                temp_path = f"temp_lexicon.{lexicon_file.name.split('.')[-1]}"
                with open(temp_path, "wb") as f:
                    f.write(lexicon_file.getbuffer())

                coptic_lexicon = load_coptic_lexicon(temp_path)

                if coptic_lexicon:
                    st.success(f"✅ Loaded {len(coptic_lexicon)} lexicon entries from {lexicon_file.name}")
                else:
                    st.warning("⚠️ File uploaded but no valid entries found")
                    coptic_lexicon = {}

                # Clean up temp file
                if os.path.exists(temp_path):
                    os.remove(temp_path)
        except Exception as e:
            st.error(f"❌ Error loading file: {str(e)}")
            st.info("💡 Supported formats: Plain text (TAB/pipe separated), XML (TEI), CSV")
            coptic_lexicon = {}
    else:
        # Try to load the comprehensive lexicon if available
        comprehensive_lexicon_path = "Comprehensive_Coptic_Lexicon-v1.2-2020.xml"
        if os.path.exists(comprehensive_lexicon_path):
            coptic_lexicon = load_coptic_lexicon(comprehensive_lexicon_path)
            if coptic_lexicon:
                st.info(f"📚 Loaded Comprehensive Coptic Lexicon: {len(coptic_lexicon)} entries")
            else:
                coptic_lexicon = {}
        else:
            coptic_lexicon = {}
    
    # Coptic alphabet reference
    if st.expander("Coptic Alphabet"):
        for letter, name in COPTIC_ALPHABET.items():
            st.text(f"{letter} - {name}")
    
    # Lexicon search
    if coptic_lexicon:
        st.subheader("Lexicon Search")

        # Initialize session state for search term
        if "search_term" not in st.session_state:
            st.session_state.search_term = ""

        # Virtual Coptic keyboard
        st.write("**Virtual Keyboard:**")
        coptic_letters = ['ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ', 'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ', 'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ']

        # Create keyboard layout in rows
        cols1 = st.columns(8)
        cols2 = st.columns(8)
        cols3 = st.columns(8)
        cols4 = st.columns(8)

        # Keyboard buttons - accumulate in session state
        for i, letter in enumerate(coptic_letters):
            col_idx = i % 8
            if i < 8:
                if cols1[col_idx].button(letter, key=f"key_{letter}"):
                    st.session_state.search_term += letter
                    st.rerun()
            elif i < 16:
                if cols2[col_idx].button(letter, key=f"key_{letter}"):
                    st.session_state.search_term += letter
                    st.rerun()
            elif i < 24:
                if cols3[col_idx].button(letter, key=f"key_{letter}"):
                    st.session_state.search_term += letter
                    st.rerun()
            else:
                if cols4[col_idx].button(letter, key=f"key_{letter}"):
                    st.session_state.search_term += letter
                    st.rerun()

        # Control buttons
        col_space, col_back, col_clear = st.columns(3)
        with col_space:
            if st.button("Space"):
                st.session_state.search_term += " "
                st.rerun()
        with col_back:
            if st.button("⌫ Backspace"):
                st.session_state.search_term = st.session_state.search_term[:-1]
                st.rerun()
        with col_clear:
            if st.button("Clear"):
                st.session_state.search_term = ""
                st.rerun()

        # Search input - directly use session state WITHOUT key parameter to avoid conflicts
        search_term = st.text_input("Search Coptic word:", value=st.session_state.search_term)

        # Update session state if user types directly
        if search_term != st.session_state.search_term:
            st.session_state.search_term = search_term
        
        if search_term:
            if search_term in coptic_lexicon:
                st.write(f"**{search_term}**")
                st.write(coptic_lexicon[search_term])
            else:
                # Partial matches
                matches = [k for k in coptic_lexicon.keys() if search_term in k]
                if matches:
                    st.write("Partial matches:")
                    for match in matches[:5]:  # Show first 5 matches
                        st.write(f"**{match}** → {coptic_lexicon[match][:100]}...")
                else:
                    st.write("No matches found")
    
    # Linguistic analysis options for Coptic input
    if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
        st.subheader("Analysis Type")
        analysis_type = st.selectbox("Choose analysis:",
                                   options=['translation', 'dialect_analysis', 'transcription', 'morphology', 'lexicon_lookup'],
                                   format_func=lambda x: x.replace('_', ' ').title())

        # Target language selector for translation
        if analysis_type == 'translation':
            st.subheader("Target Language")
            target_lang = st.selectbox("Translate to:",
                                      options=[k for k in LANGUAGES.keys() if k not in ['cop', 'cop-sa', 'cop-bo']],
                                      format_func=lambda x: LANGUAGES[x],
                                      index=0)  # Default to English
            target_language_name = LANGUAGES[target_lang]
        else:
            # For non-translation tasks, use English as default output language
            target_language_name = "English"

        # Get prompts for the target language
        COPTIC_PROMPTS = get_coptic_prompts(target_language_name)

# Use HuggingFace Inference API instead of loading model locally
# This is much faster and doesn't require GPU
MODEL_NAME = "swiss-ai/Apertus-8B-Instruct-2509"

def get_inference_client(token=None):
    """Initialize HuggingFace Inference API client with provided token"""
    try:
        if token:
            client = InferenceClient(token=token)
            return client
        else:
            # Try to get token from Space secrets as fallback
            if hasattr(st, 'secrets') and 'HF_TOKEN' in st.secrets:
                client = InferenceClient(token=st.secrets['HF_TOKEN'])
                return client
            else:
                return None
    except Exception as e:
        st.error(f"Error initializing inference client: {e}")
        return None

# Chat interface
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# User input
if prompt := st.chat_input("Type your message..."):
    # Check if API token is available
    if not hf_token_input:
        st.error("⚠️ Please enter your HuggingFace API token in the sidebar to use translation.")
        st.stop()

    # Initialize inference client with user token
    inference_client = get_inference_client(hf_token_input)

    if not inference_client:
        st.error("❌ Failed to initialize inference client. Please check your API token.")
        st.stop()

    # Add Coptic-specific prompt prefix if applicable
    if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals():
        full_prompt = f"{COPTIC_PROMPTS[analysis_type]} {prompt}"

        # Add lexicon context for lexicon lookup
        if analysis_type == 'lexicon_lookup' and coptic_lexicon:
            words_in_prompt = prompt.split()
            lexicon_matches = []
            for word in words_in_prompt:
                if word in coptic_lexicon:
                    lexicon_matches.append(f"{word} = {coptic_lexicon[word]}")

            if lexicon_matches:
                full_prompt += f"\n\nLexicon entries found: {'; '.join(lexicon_matches)}"
    else:
        full_prompt = prompt

    st.session_state.messages.append({"role": "user", "content": full_prompt})

    with st.chat_message("user"):
        st.markdown(full_prompt)

    # Generate response using HuggingFace Inference API
    with st.chat_message("assistant"):
        try:
            with st.spinner("🤖 Generating response..."):
                # Prepare messages with system instruction for better control
                if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
                    # For translation: strict system message
                    messages = [
                        {"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
                        {"role": "user", "content": full_prompt}
                    ]
                else:
                    # For other tasks: standard chat
                    messages = [{"role": "user", "content": full_prompt}]

                response_stream = inference_client.chat_completion(
                    model=MODEL_NAME,
                    messages=messages,
                    max_tokens=512,
                    temperature=0.5,  # Lower temperature for more focused translations
                    top_p=0.9,
                    stream=True
                )

                # Stream the response
                response_placeholder = st.empty()
                full_response = ""

                for message in response_stream:
                    if message.choices[0].delta.content:
                        full_response += message.choices[0].delta.content
                        response_placeholder.markdown(full_response + "▌")

                response_placeholder.markdown(full_response)
                st.session_state.messages.append({"role": "assistant", "content": full_response})

        except Exception as e:
            st.error(f"❌ Error generating response: {str(e)}")
            st.info("💡 Please verify your API token is valid and has not expired.")