""" BharatLingo AI API - HuggingFace Space Free AI-powered language tools for Indian languages Supports: English, Hindi, Tamil, Telugu, Kannada, Bengali, Marathi, Gujarati, Malayalam, Punjabi, Odia, Assamese, Bhojpuri Models Used: - Main 6 Tools: grammarly/coedit-large, google/flan-t5-base, facebook/nllb-200-distilled-600M - Language Tools (38): Optimized smaller models for CPU Basic: - Qwen2.5 1.5B: Grammar, Paragraph, Rewrite, Paraphrase, Sentence maker, etc. - google/flan-t5-small: Spell check, Sentence check, Punctuation, Autocorrect - google/flan-t5-base: Essay, Content, Article, Blog writing - google/flan-t5-base: Proofreader, Readability, Letter writer """ import gradio as gr from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline ) import torch from typing import Optional import json import re import os import time import base64 from io import BytesIO # OCR imports try: import pytesseract from PIL import Image import cv2 import numpy as np OCR_AVAILABLE = True print("✓ OCR dependencies loaded (Tesseract)") except ImportError as e: OCR_AVAILABLE = False print(f"Warning: OCR dependencies not installed: {e}") # PaddleOCR (optional, better quality but needs more dependencies) try: from paddleocr import PaddleOCR PADDLE_OCR_AVAILABLE = True print("✓ PaddleOCR loaded") except ImportError as e: PADDLE_OCR_AVAILABLE = False print(f"Info: PaddleOCR not available (will use Tesseract): {e}") # Language detection try: from langdetect import detect, DetectorFactory DetectorFactory.seed = 0 # For consistent results LANGDETECT_AVAILABLE = True print("✓ Language detection loaded") except ImportError: LANGDETECT_AVAILABLE = False print("Warning: langdetect not installed") # ============================================ # Model Loading (Lazy loading for efficiency) # ============================================ models = {} def get_translation_model(): """Get or load NLLB translation model for all languages""" if "translation" not in models: model_name = "facebook/nllb-200-distilled-600M" try: tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) if torch.cuda.is_available(): model = model.cuda() models["translation"] = (tokenizer, model, model_name) except Exception as e: print(f"Error loading translation model: {e}") raise e return models["translation"] def get_grammar_model(): """Get or load grammar correction model for MAIN tools (heavy model)""" if "grammar" not in models: model_name = "grammarly/coedit-large" try: tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) if torch.cuda.is_available(): model = model.cuda() models["grammar"] = (tokenizer, model) except Exception as e: print(f"Error loading grammar model: {e}") # Fallback model_name = "prithivida/grammar_error_correcter_v1" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) models["grammar"] = (tokenizer, model) return models["grammar"] def get_text_generation_model(): """Get or load text generation model for MAIN tools""" if "text_gen" not in models: model_name = "google/flan-t5-base" try: tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) if torch.cuda.is_available(): model = model.cuda() models["text_gen"] = (tokenizer, model, model_name) except Exception as e: print(f"Error loading text gen model: {e}") raise e return models["text_gen"] # ============================================ # SMALL MODELS for Language Tools (CPU Basic optimized) # Now using same proven models as main tools for better results # ============================================ def get_light_grammar_model(): """Grammar model for language tool pages - using grammarly/coedit-large for better results""" # Reuse the main grammar model since it works well return get_grammar_model() def get_light_writing_model(): """Writing model for language tool pages - using flan-t5-base""" # Reuse the main text generation model return get_text_generation_model() # ============================================ # Language Code Mapping # ============================================ LANG_CODE_MAP = { "english": "eng_Latn", "hindi": "hin_Deva", "bengali": "ben_Beng", "tamil": "tam_Taml", "telugu": "tel_Telu", "marathi": "mar_Deva", "gujarati": "guj_Gujr", "kannada": "kan_Knda", "malayalam": "mal_Mlym", "punjabi": "pan_Guru", "odia": "ory_Orya", "assamese": "asm_Beng", "bhojpuri": "bho_Deva", } # Language names in their native scripts LANG_NATIVE_NAMES = { "english": "English", "hindi": "हिन्दी", "bengali": "বাংলা", "tamil": "தமிழ்", "telugu": "తెలుగు", "marathi": "मराठी", "gujarati": "ગુજરાતી", "kannada": "ಕನ್ನಡ", "malayalam": "മലയാളം", "punjabi": "ਪੰਜਾਬੀ", "odia": "ଓଡ଼ିଆ", "assamese": "অসমীয়া", "bhojpuri": "भोजपुरी", } # ============================================ # Helper Functions # ============================================ def detect_script(text: str) -> str: """Detect the script of the text""" scripts = { "devanagari": (0x0900, 0x097F), "tamil": (0x0B80, 0x0BFF), "telugu": (0x0C00, 0x0C7F), "kannada": (0x0C80, 0x0CFF), "malayalam": (0x0D00, 0x0D7F), "bengali": (0x0980, 0x09FF), "gujarati": (0x0A80, 0x0AFF), "gurmukhi": (0x0A00, 0x0A7F), "odia": (0x0B00, 0x0B7F), "latin": (0x0000, 0x007F), } script_counts = {k: 0 for k in scripts} for char in text: code = ord(char) for script_name, (start, end) in scripts.items(): if start <= code <= end: script_counts[script_name] += 1 break if max(script_counts.values()) == 0: return "latin" return max(script_counts, key=script_counts.get) def is_indian_language(language: str) -> bool: """Check if the language is an Indian language (non-English)""" return language.lower() not in ["english"] def translate_internal(text: str, src_lang: str, tgt_lang: str) -> str: """Internal translation function""" if src_lang == tgt_lang: return text try: tokenizer, model, _ = get_translation_model() src_code = LANG_CODE_MAP.get(src_lang.lower(), "eng_Latn") tgt_code = LANG_CODE_MAP.get(tgt_lang.lower(), "eng_Latn") tokenizer.src_lang = src_code inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} generated = model.generate( **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_code), max_length=512, num_beams=5, early_stopping=True ) return tokenizer.decode(generated[0], skip_special_tokens=True) except Exception as e: print(f"Translation error: {e}") return text # ============================================ # API Functions # ============================================ def translate(text: str, source_lang: str, target_lang: str) -> str: """Translate text between Indian languages""" if not text or not text.strip(): return json.dumps({"error": "No text provided", "translation": ""}) try: tokenizer, model, model_name = get_translation_model() src_code = LANG_CODE_MAP.get(source_lang.lower(), "eng_Latn") tgt_code = LANG_CODE_MAP.get(target_lang.lower(), "hin_Deva") tokenizer.src_lang = src_code inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} generated = model.generate( **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_code), max_length=512, num_beams=5, early_stopping=True ) translation = tokenizer.decode(generated[0], skip_special_tokens=True) return json.dumps({ "translation": translation, "source_lang": source_lang, "target_lang": target_lang, "model": model_name }) except Exception as e: return json.dumps({"error": str(e), "translation": ""}) def check_grammar(text: str, language: str = "english") -> str: """ Check and correct grammar in text. For Indian languages: Translate to English -> Correct -> Translate back """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "corrected": "", "suggestions": []}) try: original_text = text is_indian = is_indian_language(language) # For Indian languages, translate to English first if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text # Get grammar model and correct tokenizer, model = get_grammar_model() prompt = f"Fix grammar: {english_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=5, early_stopping=True ) corrected_english = tokenizer.decode(outputs[0], skip_special_tokens=True) # For Indian languages, translate back if is_indian: corrected = translate_internal(corrected_english, "english", language) else: corrected = corrected_english # Check if there were actual changes has_errors = corrected.strip() != original_text.strip() # If corrected is empty or essentially same, no errors if not corrected.strip(): corrected = original_text has_errors = False # For Indian languages, if English correction was same as English translation, no errors if is_indian and corrected_english.strip() == english_text.strip(): has_errors = False corrected = original_text suggestions = [] if has_errors: suggestions.append({ "original": original_text, "corrected": corrected, "type": "grammar" }) return json.dumps({ "corrected": corrected, "original": original_text, "suggestions": suggestions, "has_errors": has_errors, "language": language }) except Exception as e: return json.dumps({"error": str(e), "corrected": text, "original": text, "has_errors": False, "suggestions": []}) def check_spelling(text: str, language: str = "english") -> str: """ Check spelling in text. For Indian languages: Uses translation-based correction """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "corrected": "", "misspelled": []}) try: original_text = text is_indian = is_indian_language(language) if not is_indian and language.lower() == "english": # Use spellchecker for English try: from spellchecker import SpellChecker spell = SpellChecker() words = text.split() corrected_words = [] misspelled = [] for word in words: clean_word = ''.join(c for c in word if c.isalnum()) if not clean_word: corrected_words.append(word) continue prefix = "" suffix = "" for i, c in enumerate(word): if c.isalnum(): prefix = word[:i] break for i in range(len(word) - 1, -1, -1): if word[i].isalnum(): suffix = word[i+1:] break correction = spell.correction(clean_word.lower()) if correction and correction != clean_word.lower(): if clean_word[0].isupper(): correction = correction.capitalize() corrected_words.append(prefix + correction + suffix) misspelled.append({"original": word, "corrected": prefix + correction + suffix}) else: corrected_words.append(word) corrected = ' '.join(corrected_words) return json.dumps({ "corrected": corrected, "original": original_text, "misspelled": misspelled, "has_errors": len(misspelled) > 0, "language": language }) except ImportError: pass # For Indian languages, use translation-based approach if is_indian: english_text = translate_internal(text, language, "english") corrected = translate_internal(english_text, "english", language) has_errors = corrected.strip() != original_text.strip() return json.dumps({ "corrected": corrected if has_errors else original_text, "original": original_text, "misspelled": [], "has_errors": has_errors, "language": language }) # Fallback: use grammar model tokenizer, model = get_grammar_model() prompt = f"Fix spelling: {text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=5, early_stopping=True ) corrected = tokenizer.decode(outputs[0], skip_special_tokens=True) has_errors = corrected.strip() != original_text.strip() return json.dumps({ "corrected": corrected if has_errors else original_text, "original": original_text, "has_errors": has_errors, "language": language }) except Exception as e: return json.dumps({"error": str(e), "corrected": text, "original": text, "has_errors": False}) def rewrite_text(text: str, style: str = "formal", language: str = "english") -> str: """ Rewrite text in different styles. Uses translation-based approach for Indian languages. """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "rewritten": ""}) try: original_text = text is_indian = is_indian_language(language) # For Indian languages, translate to English first if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text tokenizer, model, model_name = get_text_generation_model() style_prompts = { "formal": "Rewrite formally:", "casual": "Rewrite casually:", "professional": "Rewrite professionally:", "simple": "Simplify:", "academic": "Rewrite academically:", } prompt = f"{style_prompts.get(style, style_prompts['formal'])} {english_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=5, early_stopping=True, do_sample=True, temperature=0.7 ) rewritten_english = tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean up output for prompt_text in style_prompts.values(): if rewritten_english.startswith(prompt_text): rewritten_english = rewritten_english[len(prompt_text):].strip() if not rewritten_english.strip(): rewritten_english = english_text # For Indian languages, translate back if is_indian: rewritten = translate_internal(rewritten_english, "english", language) else: rewritten = rewritten_english return json.dumps({ "rewritten": rewritten, "original": original_text, "style": style, "language": language }) except Exception as e: return json.dumps({"error": str(e), "rewritten": text}) def summarize_text(text: str, max_length: int = 150) -> str: """Summarize text (works for all languages via translation)""" if not text or not text.strip(): return json.dumps({"error": "No text provided", "summary": ""}) word_count = len(text.split()) if word_count < 20: return json.dumps({ "summary": text, "original_length": word_count, "summary_length": word_count, "note": "Text too short to summarize" }) try: script = detect_script(text) is_indian = script != "latin" original_text = text if is_indian: script_to_lang = { "devanagari": "hindi", "tamil": "tamil", "telugu": "telugu", "kannada": "kannada", "malayalam": "malayalam", "bengali": "bengali", "gujarati": "gujarati", "gurmukhi": "punjabi", "odia": "odia", } detected_lang = script_to_lang.get(script, "hindi") english_text = translate_internal(text, detected_lang, "english") else: english_text = text detected_lang = "english" try: summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1) result = summarizer(english_text, max_length=max_length, min_length=30, do_sample=False) summary_english = result[0]['summary_text'] except Exception: tokenizer, model, _ = get_text_generation_model() prompt = f"Summarize: {english_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=max_length, num_beams=4, early_stopping=True ) summary_english = tokenizer.decode(outputs[0], skip_special_tokens=True) if is_indian: summary = translate_internal(summary_english, "english", detected_lang) else: summary = summary_english return json.dumps({ "summary": summary, "original_length": word_count, "summary_length": len(summary.split()), "detected_language": detected_lang }) except Exception as e: return json.dumps({"error": str(e), "summary": ""}) def paraphrase(text: str, language: str = "english") -> str: """Paraphrase text while keeping the meaning""" if not text or not text.strip(): return json.dumps({"error": "No text provided", "paraphrased": ""}) try: original_text = text is_indian = is_indian_language(language) if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text tokenizer, model = get_grammar_model() prompt = f"Paraphrase: {english_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=5, early_stopping=True, do_sample=True, temperature=0.9, top_p=0.95 ) paraphrased_english = tokenizer.decode(outputs[0], skip_special_tokens=True) if paraphrased_english.strip().lower() == english_text.strip().lower(): alternative_prompts = [ f"Rewrite differently: {english_text}", f"Say another way: {english_text}", f"Rephrase: {english_text}" ] for alt_prompt in alternative_prompts: inputs = tokenizer(alt_prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=5, do_sample=True, temperature=1.0, top_p=0.9 ) paraphrased_english = tokenizer.decode(outputs[0], skip_special_tokens=True) if paraphrased_english.strip().lower() != english_text.strip().lower(): break if is_indian: paraphrased = translate_internal(paraphrased_english, "english", language) else: paraphrased = paraphrased_english return json.dumps({ "paraphrased": paraphrased, "original": original_text, "language": language }) except Exception as e: return json.dumps({"error": str(e), "paraphrased": text}) # ============================================ # LANGUAGE TOOL API FUNCTIONS (38 Tools) # Using lightweight models optimized for CPU Basic # ============================================ def tool_grammar_checker(text: str, language: str = "english", tool_type: str = "grammar-checker") -> str: """ Grammar checker for language tool pages. Uses grammarly/coedit-large model for better results. Supports: grammar-checker, sentence-corrector, paragraph-corrector, text-corrector, writing-checker """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: original_text = text is_indian = is_indian_language(language) # For Indian languages, translate to English first if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text tokenizer, model = get_light_grammar_model() # Different prompts for different tool types tool_prompts = { "grammar-checker": f"Fix grammar errors: {english_text}", "sentence-corrector": f"Correct this sentence: {english_text}", "paragraph-corrector": f"Fix grammar in paragraph: {english_text}", "text-corrector": f"Correct this text: {english_text}", "writing-checker": f"Check and fix writing: {english_text}", "sentence-checker": f"Check this sentence: {english_text}", } prompt = tool_prompts.get(tool_type, tool_prompts["grammar-checker"]) inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=4, early_stopping=True ) corrected_english = tokenizer.decode(outputs[0], skip_special_tokens=True) # Translate back if Indian language if is_indian: corrected = translate_internal(corrected_english, "english", language) else: corrected = corrected_english return json.dumps({ "result": corrected, "original": original_text, "tool": tool_type, "language": language, "has_corrections": corrected.strip() != original_text.strip() }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": tool_type}) def tool_spell_checker(text: str, language: str = "english", tool_type: str = "spell-checker") -> str: """ Spell checker for language tool pages. Uses lighter model for: spell-checker, punctuation-checker, autocorrect, spell-check-online """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: original_text = text is_indian = is_indian_language(language) if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text tokenizer, model = get_light_grammar_model() tool_prompts = { "spell-checker": f"Fix spelling errors: {english_text}", "punctuation-checker": f"Fix punctuation: {english_text}", "autocorrect": f"Auto-correct: {english_text}", "spell-check-online": f"Check spelling: {english_text}", "email-checker": f"Fix email text: {english_text}", } prompt = tool_prompts.get(tool_type, tool_prompts["spell-checker"]) inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=4, early_stopping=True ) corrected_english = tokenizer.decode(outputs[0], skip_special_tokens=True) if is_indian: corrected = translate_internal(corrected_english, "english", language) else: corrected = corrected_english return json.dumps({ "result": corrected, "original": original_text, "tool": tool_type, "language": language, "has_corrections": corrected.strip() != original_text.strip() }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": tool_type}) def tool_writing_assistant(text: str, language: str = "english", tool_type: str = "rewrite", style: str = "formal") -> str: """ Writing assistant for language tool pages. Uses flan-t5-base for: essay-corrector, content-writer, article-rewriter, blog-writer, proofreader, readability-checker, letter-writer, editor, paraphrasing-tool """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: original_text = text is_indian = is_indian_language(language) if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text tokenizer, model, _ = get_light_writing_model() tool_prompts = { "essay-corrector": f"Improve this essay: {english_text}", "content-writer": f"Enhance this content: {english_text}", "article-rewriter": f"Rewrite this article: {english_text}", "blog-writer": f"Improve this blog post: {english_text}", "proofreader": f"Proofread and fix: {english_text}", "readability-checker": f"Improve readability: {english_text}", "letter-writer": f"Improve this letter: {english_text}", "editor": f"Edit and improve: {english_text}", "paraphrasing-tool": f"Paraphrase: {english_text}", "sentence-maker": f"Improve this sentence: {english_text}", "quote-translator": f"Translate quote: {english_text}", "caption-generator": f"Generate caption: {english_text}", "vocabulary-builder": f"Explain vocabulary: {english_text}", "learning-app": f"Explain for learning: {english_text}", } prompt = tool_prompts.get(tool_type, f"Improve: {english_text}") inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=4, early_stopping=True, do_sample=True, temperature=0.7 ) result_english = tokenizer.decode(outputs[0], skip_special_tokens=True) if is_indian: result = translate_internal(result_english, "english", language) else: result = result_english return json.dumps({ "result": result, "original": original_text, "tool": tool_type, "language": language, "style": style }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": tool_type}) def tool_summarizer(text: str, language: str = "english", max_length: int = 150) -> str: """ Summarizer for language tool pages. Uses flan-t5-base model. """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) word_count = len(text.split()) if word_count < 20: return json.dumps({ "result": text, "original_length": word_count, "summary_length": word_count, "note": "Text too short to summarize" }) try: is_indian = is_indian_language(language) original_text = text if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text tokenizer, model, _ = get_light_writing_model() prompt = f"Summarize: {english_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=max_length, num_beams=4, early_stopping=True ) summary_english = tokenizer.decode(outputs[0], skip_special_tokens=True) if is_indian: summary = translate_internal(summary_english, "english", language) else: summary = summary_english return json.dumps({ "result": summary, "original": original_text, "original_length": word_count, "summary_length": len(summary.split()), "language": language, "tool": "summarizer" }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": "summarizer"}) def tool_translator(text: str, source_lang: str = "english", target_lang: str = "hindi") -> str: """ Translator for language tool pages. Uses NLLB translation model - same as main translate function. """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: translated = translate_internal(text, source_lang, target_lang) return json.dumps({ "result": translated, "original": text, "source_language": source_lang, "target_language": target_lang, "tool": "translator" }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": "translator"}) def tool_generic(text: str, language: str = "english", tool_type: str = "generic") -> str: """ Generic tool handler for tools that don't need heavy AI processing. Returns processed result based on tool type. Handles: word-counter, thesaurus, dictionary, roman-typing, keyboard, typing-tool, font-converter, unicode-converter, script-converter """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: result_data = { "original": text, "tool": tool_type, "language": language } if tool_type == "word-counter": words = len(text.split()) chars = len(text) chars_no_space = len(text.replace(" ", "")) sentences = len([s for s in text.split('.') if s.strip()]) paragraphs = len([p for p in text.split('\n\n') if p.strip()]) result_data.update({ "result": text, "word_count": words, "character_count": chars, "character_count_no_spaces": chars_no_space, "sentence_count": sentences, "paragraph_count": paragraphs }) elif tool_type == "thesaurus": # Basic thesaurus - return common synonyms (simplified) result_data.update({ "result": text, "note": "Use the interactive thesaurus on the tool page for synonyms" }) elif tool_type == "dictionary": result_data.update({ "result": text, "note": "Use the interactive dictionary on the tool page for definitions" }) else: # For typing tools, converters - these are handled client-side result_data.update({ "result": text, "note": f"The {tool_type} is available on the tool page" }) return result_data except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": tool_type}) def tool_transliterate(text: str, source_lang: str = "english", target_lang: str = "hindi", mode: str = "roman") -> str: """ Transliterate text between scripts. Modes: roman (English to target script), script (between Indian scripts) Uses translation model as a proxy for transliteration. """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: # Use translation as proxy for transliteration # For roman typing: English -> target language if mode == "roman": result = translate_internal(text, "english", target_lang) else: # Script conversion: source script -> target script result = translate_internal(text, source_lang, target_lang) return json.dumps({ "result": result, "original": text, "source_language": source_lang, "target_language": target_lang, "mode": mode, "tool": "transliterate" }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": "transliterate"}) def tool_dictionary(text: str, language: str = "english", mode: str = "meaning") -> str: """ Dictionary/Thesaurus tool. Modes: meaning (dictionary), synonyms (thesaurus) Uses AI to provide definitions and synonyms. """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: tokenizer, model, _ = get_text_generation_model() if mode == "synonyms": prompt = f"List 8 synonyms for the word '{text}'. Format as numbered list (1., 2., 3., etc.):" else: prompt = f"Define the word '{text}' clearly and provide 2 example sentences showing its usage:" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=256) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=250, min_length=30, num_beams=5, early_stopping=True, do_sample=True, temperature=0.7, repetition_penalty=1.2 ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) # For non-English, translate the result if is_indian_language(language): result = translate_internal(result, "english", language) return json.dumps({ "result": result, "original": text, "language": language, "mode": mode, "tool": "dictionary" if mode == "meaning" else "thesaurus" }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": "dictionary"}) def tool_generate(text: str, language: str = "english", mode: str = "sentence", count: int = 2) -> str: """ Text generation tool for various purposes. Modes: sentence (make sentences), content (write content), letter (write letter), blog (write blog), caption (generate captions), vocabulary (word lists) """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: tokenizer, model, _ = get_text_generation_model() # Enhanced prompts for better generation mode_prompts = { "sentence": f"Generate {count} complete, clear example sentences using the word or phrase '{text}'. Number each sentence (1., 2., 3.). Make them practical and easy to understand:", "content": f"Write a detailed, well-structured article (minimum 3 paragraphs) about: {text}. Include an introduction, main points with explanations, and a conclusion:", "letter": f"Write a complete formal letter with proper format about: {text}. Include: proper date, recipient address, salutation (Dear Sir/Madam), 3-4 full paragraphs explaining the purpose clearly, and proper closing (Sincerely/Yours faithfully) with sender details:", "blog": f"Write a complete blog post (minimum 4 paragraphs) about: {text}. Include an engaging introduction, main content with details and examples, practical tips or insights, and a conclusion:", "caption": f"Generate {count} creative, engaging social media captions for: {text}. Number each caption (1., 2., 3.). Make them catchy and include relevant hashtags:", "vocabulary": f"List {count} vocabulary words related to '{text}'. Format each as: [number]. [word] - [meaning]. Make it a numbered list (1., 2., 3., etc.) with clear explanations:" } prompt = mode_prompts.get(mode, f"Write detailed content about: {text}") inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} # Increase max_length for better content generation max_gen_length = 512 if mode in ['letter', 'blog', 'content'] else 350 outputs = model.generate( **inputs, max_length=max_gen_length, min_length=100 if mode in ['letter', 'blog', 'content'] else 50, num_beams=5, early_stopping=True, do_sample=True, temperature=0.8, top_p=0.9, repetition_penalty=1.2 ) result_english = tokenizer.decode(outputs[0], skip_special_tokens=True) # For non-English, translate the result if is_indian_language(language): result = translate_internal(result_english, "english", language) else: result = result_english return json.dumps({ "result": result, "original": text, "language": language, "mode": mode, "count": count, "tool": "generator" }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": "generator"}) def tool_analyze(text: str, language: str = "english", mode: str = "quality") -> str: """ Text analysis tool for quality and readability. Modes: quality (writing quality score), email (email analysis), readability """ if not text or not text.strip(): return json.dumps({"error": "No text provided", "result": ""}) try: original_text = text is_indian = is_indian_language(language) # Basic text statistics words = len(text.split()) sentences = len([s for s in re.split(r'[.!?।॥]', text) if s.strip()]) avg_word_length = sum(len(w) for w in text.split()) / max(words, 1) avg_sentence_length = words / max(sentences, 1) # Calculate readability score (simplified Flesch-like) readability_score = max(0, min(100, 100 - (avg_sentence_length * 2) - (avg_word_length * 10))) # For Indian languages, translate to English for AI analysis if is_indian: english_text = translate_internal(text, language, "english") else: english_text = text # Get AI suggestions tokenizer, model = get_grammar_model() if mode == "email": prompt = f"Analyze this email for tone and professionalism: {english_text}" elif mode == "readability": prompt = f"Suggest ways to make this text easier to read: {english_text}" else: prompt = f"Improve the quality of this writing: {english_text}" inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} outputs = model.generate( **inputs, max_length=512, num_beams=4, early_stopping=True ) suggestion_english = tokenizer.decode(outputs[0], skip_special_tokens=True) # Translate suggestion back if needed if is_indian: suggestion = translate_internal(suggestion_english, "english", language) else: suggestion = suggestion_english # Determine quality level if readability_score >= 80: quality_level = "Excellent" elif readability_score >= 60: quality_level = "Good" elif readability_score >= 40: quality_level = "Fair" else: quality_level = "Needs Improvement" return json.dumps({ "result": suggestion, "original": original_text, "language": language, "mode": mode, "tool": "analyzer", "stats": { "word_count": words, "sentence_count": sentences, "avg_word_length": round(avg_word_length, 1), "avg_sentence_length": round(avg_sentence_length, 1), "readability_score": round(readability_score), "quality_level": quality_level } }) except Exception as e: return json.dumps({"error": str(e), "result": text, "tool": "analyzer"}) # ============================================ # Gradio Interface with API endpoints # ============================================ translate_interface = gr.Interface( fn=translate, inputs=[ gr.Textbox(label="Text to Translate", lines=5, placeholder="Enter text here..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Source Language"), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="hindi", label="Target Language") ], outputs=gr.Code(label="Result", language="json"), title="Translation", description="Translate between Indian languages", api_name="translate" ) grammar_interface = gr.Interface( fn=check_grammar, inputs=[ gr.Textbox(label="Text to Check", lines=5, placeholder="Enter text to check grammar..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language") ], outputs=gr.Code(label="Result", language="json"), title="Grammar Checker", description="Check and correct grammar (supports all Indian languages)", api_name="grammar" ) spelling_interface = gr.Interface( fn=check_spelling, inputs=[ gr.Textbox(label="Text to Check", lines=5, placeholder="Enter text to check spelling..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language") ], outputs=gr.Code(label="Result", language="json"), title="Spell Checker", description="Check spelling (supports all Indian languages)", api_name="spelling" ) rewrite_interface = gr.Interface( fn=rewrite_text, inputs=[ gr.Textbox(label="Text to Rewrite", lines=5, placeholder="Enter text to rewrite..."), gr.Dropdown(choices=["formal", "casual", "professional", "simple", "academic"], value="formal", label="Style"), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language") ], outputs=gr.Code(label="Result", language="json"), title="Text Rewriter", description="Rewrite text in different styles (supports all Indian languages)", api_name="rewrite" ) summarize_interface = gr.Interface( fn=summarize_text, inputs=[ gr.Textbox(label="Text to Summarize", lines=10, placeholder="Enter long text..."), gr.Slider(minimum=50, maximum=300, value=150, step=10, label="Max Summary Length") ], outputs=gr.Code(label="Result", language="json"), title="Text Summarizer", description="Summarize long texts (auto-detects language)", api_name="summarize" ) paraphrase_interface = gr.Interface( fn=paraphrase, inputs=[ gr.Textbox(label="Text to Paraphrase", lines=5, placeholder="Enter text to paraphrase..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language") ], outputs=gr.Code(label="Result", language="json"), title="Paraphraser", description="Paraphrase text (supports all Indian languages)", api_name="paraphrase" ) # ============================================ # Language Tool Interfaces (38 Tools API) # ============================================ tool_grammar_interface = gr.Interface( fn=tool_grammar_checker, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["grammar-checker", "sentence-corrector", "paragraph-corrector", "text-corrector", "writing-checker", "sentence-checker"], value="grammar-checker", label="Tool Type") ], outputs=gr.Code(label="Result", language="json"), title="Grammar Tools", description="Grammar checking tools for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_spelling_interface = gr.Interface( fn=tool_spell_checker, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["spell-checker", "punctuation-checker", "autocorrect", "spell-check-online", "email-checker"], value="spell-checker", label="Tool Type") ], outputs=gr.Code(label="Result", language="json"), title="Spelling Tools", description="Spelling and punctuation tools for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_writing_interface = gr.Interface( fn=tool_writing_assistant, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["essay-corrector", "content-writer", "article-rewriter", "blog-writer", "proofreader", "readability-checker", "letter-writer", "editor", "paraphrasing-tool", "sentence-maker", "quote-translator", "caption-generator", "vocabulary-builder", "learning-app"], value="essay-corrector", label="Tool Type"), gr.Dropdown(choices=["formal", "casual", "professional", "simple", "academic"], value="formal", label="Style") ], outputs=gr.Code(label="Result", language="json"), title="Writing Tools", description="Writing assistance tools for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_summarizer_interface = gr.Interface( fn=tool_summarizer, inputs=[ gr.Textbox(label="Text", lines=10, placeholder="Enter long text..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Slider(minimum=50, maximum=300, value=150, step=10, label="Max Summary Length") ], outputs=gr.Code(label="Result", language="json"), title="Summarizer Tool", description="Text summarization for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_translator_interface = gr.Interface( fn=tool_translator, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text to translate..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Source Language"), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="hindi", label="Target Language") ], outputs=gr.Code(label="Result", language="json"), title="Translator Tool", description="Translation for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_generic_interface = gr.Interface( fn=tool_generic, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["word-counter", "thesaurus", "dictionary", "roman-typing", "keyboard", "typing-tool", "font-converter", "unicode-converter", "script-converter"], value="word-counter", label="Tool Type") ], outputs=gr.Code(label="Result", language="json"), title="Utility Tools", description="Utility tools (word counter, etc.) for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_transliterate_interface = gr.Interface( fn=tool_transliterate, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text to transliterate..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Source Language"), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="hindi", label="Target Language"), gr.Dropdown(choices=["roman", "script"], value="roman", label="Mode") ], outputs=gr.Code(label="Result", language="json"), title="Transliteration Tool", description="Convert text between scripts for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_dictionary_interface = gr.Interface( fn=tool_dictionary, inputs=[ gr.Textbox(label="Word/Text", lines=2, placeholder="Enter word to look up..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["meaning", "synonyms"], value="meaning", label="Mode") ], outputs=gr.Code(label="Result", language="json"), title="Dictionary/Thesaurus Tool", description="Find word meanings and synonyms for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_generate_interface = gr.Interface( fn=tool_generate, inputs=[ gr.Textbox(label="Topic/Keywords", lines=3, placeholder="Enter topic or keywords..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["sentence", "content", "letter", "blog", "caption", "vocabulary"], value="sentence", label="Generation Mode"), gr.Slider(minimum=1, maximum=10, value=2, step=1, label="Count (for lists)") ], outputs=gr.Code(label="Result", language="json"), title="Text Generator Tool", description="Generate sentences, content, letters, and more for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) tool_analyze_interface = gr.Interface( fn=tool_analyze, inputs=[ gr.Textbox(label="Text", lines=5, placeholder="Enter text to analyze..."), gr.Dropdown(choices=list(LANG_CODE_MAP.keys()), value="english", label="Language"), gr.Dropdown(choices=["quality", "email", "readability"], value="quality", label="Analysis Mode") ], outputs=gr.Code(label="Result", language="json"), title="Text Analysis Tool", description="Analyze writing quality and readability for all Indian languages", api_name=False # Disabled to avoid Gradio schema errors ) demo = gr.TabbedInterface( [translate_interface, grammar_interface, spelling_interface, rewrite_interface, summarize_interface, paraphrase_interface, tool_grammar_interface, tool_spelling_interface, tool_writing_interface, tool_summarizer_interface, tool_translator_interface, tool_generic_interface, tool_transliterate_interface, tool_dictionary_interface, tool_generate_interface, tool_analyze_interface], ["🌐 Translate", "✓ Grammar", "📝 Spelling", "✍️ Rewrite", "📄 Summarize", "🔄 Paraphrase", "🔧 Tool: Grammar", "🔧 Tool: Spelling", "🔧 Tool: Writing", "🔧 Tool: Summary", "🔧 Tool: Translate", "🔧 Tool: Utils", "⌨️ Transliterate", "📖 Dictionary", "✨ Generate", "📊 Analyze"], title="🇮🇳 BharatLingo AI API", ) demo.queue() if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, share=False)