graphics-llm / app.py
Tom
Update UI text: simplify footer and change header wording
1876b60
"""
Viz LLM - Gradio App
A RAG-powered assistant for data visualization guidance, powered by Jina-CLIP-v2
embeddings and research from the field of information graphics.
Now with Datawrapper integration for chart generation!
"""
import os
import io
import asyncio
import time
import pandas as pd
import gradio as gr
from dotenv import load_dotenv
from src.rag_pipeline import create_pipeline
from src.datawrapper_client import create_and_publish_chart, get_iframe_html
from datetime import datetime, timedelta
from collections import defaultdict
from src.vanna import VannaComponent
from src.query_intent_classifier import classify_query, IntentClassifier
# Load environment variables
load_dotenv()
# Rate limiting: Track requests per user (IP-based)
# Format: {ip: [(timestamp1, timestamp2, ...)]}
rate_limit_tracker = defaultdict(list)
DAILY_LIMIT = 20
# Initialize the RAG pipeline
print("Initializing Graphics Design Pipeline...")
try:
pipeline = create_pipeline(
retrieval_k=5,
model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"),
temperature=float(os.getenv("LLM_TEMPERATURE", "0.2"))
)
print("โœ“ Pipeline initialized successfully")
except Exception as e:
print(f"โœ— Error initializing pipeline: {e}")
raise
# Initialize Vanna
print("Initializing Vanna...")
try:
vanna = VannaComponent(
hf_model="Qwen/Qwen3-VL-30B-A3B-Instruct",
hf_token=os.getenv("HF_TOKEN_VANNA"),
hf_provider="novita",
connection_string=os.getenv("SUPABASE_CONNECTION")
)
print("โœ“ Vanna initialized successfully")
except Exception as e:
print(f"โœ— Error initializing Vanna: {e}")
raise
# CSV cleanup function
def cleanup_old_csv_files():
"""Delete CSV files older than 24 hours to prevent accumulation"""
folder = "513935c4d2db2d2d"
if not os.path.exists(folder):
return
cleaned = 0
for file in os.listdir(folder):
if file.endswith(".csv"):
file_path = os.path.join(folder, file)
try:
# Check if file is older than 24 hours
if os.path.getmtime(file_path) < time.time() - 86400:
os.remove(file_path)
cleaned += 1
except Exception as e:
print(f"Warning: Could not delete {file_path}: {e}")
if cleaned > 0:
print(f"โœ“ Cleaned up {cleaned} old CSV files")
# Run cleanup on startup
print("Cleaning up old CSV files...")
cleanup_old_csv_files()
def check_rate_limit(request: gr.Request) -> tuple[bool, int]:
"""Check if user has exceeded rate limit"""
if request is None:
return True, DAILY_LIMIT # Allow if no request object
user_id = request.client.host
now = datetime.now()
cutoff = now - timedelta(days=1)
# Remove old requests (older than 24 hours)
rate_limit_tracker[user_id] = [
ts for ts in rate_limit_tracker[user_id] if ts > cutoff
]
remaining = DAILY_LIMIT - len(rate_limit_tracker[user_id])
if remaining <= 0:
return False, 0
# Add current request
rate_limit_tracker[user_id].append(now)
return True, remaining - 1
def recommend_stream(message: str, history: list, request: gr.Request):
"""
Streaming version of design recommendation function
Args:
message: User's design query
history: Chat history
request: Gradio request object for rate limiting
Yields:
Response chunks
"""
# Check rate limit
allowed, remaining = check_rate_limit(request)
if not allowed:
yield "โš ๏ธ **Rate limit exceeded.** You've reached the maximum of 20 queries per day. Please try again in 24 hours."
return
try:
response_stream = pipeline.generate_recommendations(message, stream=True)
full_response = ""
for chunk in response_stream:
full_response += chunk
yield full_response
# Add rate limit info at the end
if remaining <= 5:
yield full_response + f"\n\n---\n*You have {remaining} queries remaining today.*"
except Exception as e:
yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_URL, SUPABASE_KEY) and try again."
def generate_chart_from_csv(csv_file, user_prompt, api_key):
"""
Generate a Datawrapper chart from uploaded CSV and user prompt using user's API key.
Args:
csv_file: Uploaded CSV file
user_prompt: User's description of the chart
api_key: User's Datawrapper API key
Returns:
HTML string with iframe or error message
"""
# Validate API key first
if not api_key or api_key.strip() == "":
return """
<div style='padding: 50px; text-align: center; color: #d9534f;'>
<h3>โŒ No API Key Provided</h3>
<p>Please enter your Datawrapper API key above to generate charts.</p>
<p style='margin-top: 15px;'>
<a href='https://app.datawrapper.de/account/api-tokens' target='_blank'
style='color: #1976d2; text-decoration: underline;'>Get your API key โ†’</a>
</p>
</div>
"""
if not csv_file:
return "<div style='padding: 50px; text-align: center;'>Please upload a CSV file to generate a chart.</div>"
if not user_prompt or user_prompt.strip() == "":
return "<div style='padding: 50px; text-align: center;'>Please describe what chart you want to create.</div>"
# Temporarily set the API key in environment for this request
original_key = os.environ.get("DATAWRAPPER_ACCESS_TOKEN")
os.environ["DATAWRAPPER_ACCESS_TOKEN"] = api_key
try:
# Show loading message
loading_html = """
<div style='padding: 100px; text-align: center;'>
<h3>๐ŸŽจ Creating your chart...</h3>
<p>Analyzing your data and selecting the best visualization...</p>
</div>
"""
# Read CSV file
df = pd.read_csv(csv_file)
# Create and publish chart (async function, need to run in event loop)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(
create_and_publish_chart(df, user_prompt, pipeline)
)
loop.close()
if result.get("success"):
# Get the iframe HTML
iframe_html = get_iframe_html(result.get('public_url'), height=500)
# Create HTML with iframe, reasoning, and edit button
chart_html = f"""
<div style='padding: 20px;'>
<!-- Chart iframe -->
<div style='margin-bottom: 20px;'>
{iframe_html}
</div>
<!-- Why this chart? -->
<div style='background: #f9f9f9; padding: 15px; border-radius: 5px; margin-bottom: 15px;'>
<strong>Why this chart?</strong><br>
<p style='margin: 10px 0 0 0;'>{result['reasoning']}</p>
</div>
<!-- Edit button -->
<div>
<a href="{result['edit_url']}" target="_blank"
style="display: inline-block; padding: 12px 24px; background: #1976d2; color: white;
text-decoration: none; border-radius: 5px; font-weight: bold;">
โœ๏ธ Open in Datawrapper
</a>
</div>
</div>
"""
return chart_html
else:
error_msg = result.get("error", "Unknown error")
return f"""
<div style='padding: 50px; text-align: center; color: red;'>
<h3>โŒ Chart Generation Failed</h3>
<p>{error_msg}</p>
<p style='font-size: 0.9em; color: #666;'>Please check your CSV format and try again.</p>
</div>
"""
except Exception as e:
return f"""
<div style='padding: 50px; text-align: center; color: red;'>
<h3>โŒ Error</h3>
<p>{str(e)}</p>
<p style='font-size: 0.9em; color: #666;'>Please ensure your CSV is properly formatted and your API key is correct.</p>
</div>
"""
finally:
# Restore original API key or remove it
if original_key:
os.environ["DATAWRAPPER_ACCESS_TOKEN"] = original_key
elif "DATAWRAPPER_ACCESS_TOKEN" in os.environ:
del os.environ["DATAWRAPPER_ACCESS_TOKEN"]
def csv_to_cards_html(csv_text: str) -> str:
"""
Transforme le CSV brut retournรฉ par Vanna en cartes HTML.
"""
try:
df = pd.read_csv(io.StringIO(csv_text.strip()))
if df.empty:
return "<div style='padding: 50px; text-align: center;'>Aucune donnรฉe trouvรฉe.</div>"
cards_html = ""
for _, row in df.iterrows():
title = row.get("title", "Sans titre")
source_url = row.get("source_url", "#")
author = row.get("author", "Inconnu")
published_date = row.get("published_date", "")
image_url = row.get("image_url", "https://fpoimg.com/800x600?text=Image+not+found")
cards_html += f"""
<div style="background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);
overflow: hidden; margin: 10px; width: 320px; flex: 0 0 auto;">
<img src="{image_url}" alt="{title}" style="width:100%; height:180px; object-fit:cover;">
<div style="padding: 12px 16px;">
<h3 style="margin:0; font-size:1.1em; color:#222;">{title}</h3>
<p style="margin:6px 0; color:#555; font-size:0.9em;">{author}</p>
<p style="margin:0; color:#999; font-size:0.8em;">{published_date}</p>
<a href="{source_url}" target="_blank"
style="display:inline-block; margin-top:8px; font-size:0.9em; color:#1976d2; text-decoration:none;">
๐Ÿ”— Source
</a>
</div>
</div>
"""
html = f"""
<div style="display:flex; flex-wrap:wrap; justify-content:center; padding:20px;">
{cards_html}
</div>
"""
return html
except Exception as e:
return f"<div style='padding: 50px; text-align: center; color:red;'>Erreur lors du parsing du CSV : {e}</div>"
async def search_inspiration_from_database(user_prompt):
"""
Search inspiration posts from user prompt in database.
Args:
user_prompt: User's description of the inspiration query
Returns:
HTML string displaying cards or an error message
"""
if not user_prompt or user_prompt.strip() == "":
return """
<div style='padding: 50px; text-align: center;'>
Please describe what kind of inspiration you want to search for.
</div>
"""
try:
# Classify user intent
print(f"\n{'='*60}")
print(f"[SEARCH] User prompt: {user_prompt}")
classifier = IntentClassifier()
classification = classifier.classify(user_prompt)
print(f"[INTENT] Type: {classification['intent'].value}")
print(f"[INTENT] Keywords: {classification['keywords']}")
print(f"[INTENT] Inferred tags: {classification['tags']}")
print(f"[INTENT] Short query: {classification['is_short_query']}")
# Enhance prompt with intent guidance
enhanced_prompt = classifier.format_for_vanna(classification)
full_prompt = f"{user_prompt}\n\n{enhanced_prompt}"
print(f"[VANNA] Sending enhanced prompt to Vanna...")
response = await vanna.ask(full_prompt)
print(f"[VANNA] Response received: {repr(response)[:200]}...")
print(f"{'='*60}\n")
clean_response = response.strip()
# Check for empty query results (0 rows returned)
if "No rows returned" in clean_response or "0 rows" in clean_response.lower():
return f"""
<div style='padding: 50px; text-align: center; color: #f0ad4e;'>
<h3>๐Ÿ” No Results Found</h3>
<p>Your query was executed successfully, but no posts matched your criteria.</p>
<p style='margin-top: 15px; font-weight: 600;'>Suggestions:</p>
<ul style='list-style: none; padding: 0; text-align: left; display: inline-block;'>
<li>โ€ข Try broader keywords (e.g., "visualization" instead of "F1 dataviz")</li>
<li>โ€ข Search by author names (e.g., "New York Times")</li>
<li>โ€ข Use simple terms (e.g., "interactive", "maps")</li>
</ul>
<p style='margin-top: 15px; font-style: italic; color: #666; font-size: 0.9em;'>
<strong>Note:</strong> Most posts are currently being enriched with tags.<br/>
Keyword search works for all {classification.get('total_posts', '7,000+')} posts in the database.
</p>
</div>
"""
# Check for errors or warnings
if clean_response.startswith("โš ๏ธ") or clean_response.startswith("โŒ") or "Aucun CSV dรฉtectรฉ" in clean_response:
return f"""
<div style='padding: 50px; text-align: center; color: #d9534f;'>
<h3>โŒ Query Error</h3>
<p>The AI encountered an issue processing your request.</p>
<p style='margin-top: 10px; font-size: 0.9em; color: #666;'>{clean_response[:200]}</p>
<p style='margin-top: 15px;'>Try rephrasing your query or being more specific.</p>
</div>
"""
# Process CSV response
csv_text = (
clean_response
.strip("```")
.replace("csv", "")
.replace("CSV", "")
)
# Check if response contains CSV data
if "," not in csv_text or "id,title" not in csv_text.lower():
return f"""
<div style='padding: 50px; text-align: center; color: #d9534f;'>
<h3>โŒ Invalid Response Format</h3>
<p>The database query didn't return structured data.</p>
<p style='margin-top: 10px; font-size: 0.9em; color: #666;'>
This might be a temporary issue. Please try again.
</p>
</div>
"""
cards_html = csv_to_cards_html(csv_text)
return cards_html
except Exception as e:
print(f"โŒ Exception in search_inspiration_from_database: {str(e)}")
import traceback
traceback.print_exc()
return f"""
<div style='padding: 50px; text-align: center; color: red;'>
<h3>โŒ System Error</h3>
<p style='margin-bottom: 10px;'>An unexpected error occurred:</p>
<p style='font-family: monospace; font-size: 0.85em; color: #666;'>{str(e)}</p>
<p style='margin-top: 15px; font-size: 0.9em; color: #666;'>
Please check the console logs for more details.
</p>
</div>
"""
# Minimal CSS to fix UI artifacts and style the mode selector
custom_css = """
/* Hide retry/undo buttons that appear as artifacts */
.chatbot button[aria-label="Retry"],
.chatbot button[aria-label="Undo"] {
display: none !important;
}
/* Remove overflow-y scroll from textarea */
textarea[data-testid="textbox"] {
overflow-y: hidden !important;
}
/* Mode selector buttons */
.mode-button {
font-size: 1.1em;
padding: 12px 24px;
margin: 5px;
}
"""
# Create Gradio interface with dual-mode layout
with gr.Blocks(
title="Viz LLM",
css=custom_css
) as demo:
gr.Markdown("""
# ๐Ÿ“Š Viz LLM
Discover inspiring visualizations, refine your design ideas, or generate charts using Datawrapper.
""")
# JavaScript for localStorage persistence
gr.HTML("""
<script>
// Save API key to localStorage when it changes
function saveApiKeyToStorage(key) {
if (key && key.trim() !== '') {
localStorage.setItem('datawrapper_api_key', key);
}
}
// Load API key from localStorage on page load
function loadApiKeyFromStorage() {
return localStorage.getItem('datawrapper_api_key') || '';
}
// Auto-load API key when the page loads
window.addEventListener('DOMContentLoaded', function() {
setTimeout(function() {
const savedKey = loadApiKeyFromStorage();
if (savedKey) {
const apiKeyInput = document.querySelector('input[type="password"]');
if (apiKeyInput) {
apiKeyInput.value = savedKey;
// Trigger change event to update Gradio state
apiKeyInput.dispatchEvent(new Event('input', { bubbles: true }));
}
}
}, 1000);
});
</script>
""")
# Mode selector buttons (reordered: Inspiration, Refinement, Chart)
with gr.Row():
inspiration_btn = gr.Button("โœจ Inspiration", variant="primary", elem_classes="mode-button")
ideation_btn = gr.Button("๐Ÿ’ก Refinement", variant="secondary", elem_classes="mode-button")
chart_gen_btn = gr.Button("๐Ÿ“Š Chart", variant="secondary", elem_classes="mode-button")
# Inspiration Mode: Search interface (shown by default)
with gr.Column(visible=True) as inspiration_container:
with gr.Row():
inspiration_prompt_input = gr.Textbox(
placeholder="Search for inspiration (e.g., 'F1', 'interactive maps')...",
show_label=False,
scale=4,
container=False
)
inspiration_search_btn = gr.Button("๐Ÿ” Search", variant="primary", scale=1)
inspiration_cards_html = gr.HTML("")
# Refinement Mode: Chat interface (hidden by default, wrapped in Column)
with gr.Column(visible=False) as ideation_container:
ideation_interface = gr.ChatInterface(
fn=recommend_stream,
type="messages",
examples=[
"What's the best chart type for showing trends over time?",
"How do I create an effective infographic for complex data?",
"What are best practices for data visualization accessibility?",
"How should I design a dashboard for storytelling?",
"What visualization works best for comparing categories?"
],
cache_examples=False,
api_name="recommend"
)
# Chart Generation Mode: Chart controls and output (hidden by default)
with gr.Column(visible=False) as chart_gen_container:
gr.Markdown("### Chart Generator")
# API Key Input (collapsible)
with gr.Accordion("๐Ÿ”‘ Datawrapper API Key", open=False):
gr.Markdown("""
Enter your Datawrapper API key to generate charts. Your key is stored in your browser and persists across sessions.
**Get your key**: [Datawrapper Account Settings](https://app.datawrapper.de/account/api-tokens)
""")
# Warning about permissions
gr.HTML("""
<div style="background: #fff3cd; border: 1px solid #ffc107; border-radius: 5px; padding: 12px; margin: 10px 0;">
<strong>โš ๏ธ Important:</strong> When creating your API key, toggle <strong>ALL permissions</strong> (Read & Write for Charts, Tables, Folders, etc.) otherwise chart generation will fail.
</div>
""")
api_key_input = gr.Textbox(
label="API Key",
placeholder="Paste your Datawrapper API key here...",
type="password",
value=""
)
api_key_status = gr.Markdown("โš ๏ธ Status: No API key provided")
csv_upload = gr.File(
label="๐Ÿ“ Upload CSV File",
file_types=[".csv"],
type="filepath"
)
chart_prompt_input = gr.Textbox(
label="Describe your chart",
placeholder="E.g., 'Show sales trends over time' or 'Compare revenue by category'",
lines=2
)
generate_chart_btn = gr.Button("Generate Chart", variant="primary", size="lg")
chart_output = gr.HTML(
value="<div style='text-align:center; padding:100px; color: #666;'>Upload a CSV file and describe your visualization above, then click Generate Chart.</div>",
label="Generated Chart"
)
# API key state management
api_key_state = gr.State(value="")
def validate_api_key(api_key: str) -> tuple[str, str]:
"""Validate and store API key"""
if not api_key or api_key.strip() == "":
return "", "โš ๏ธ Status: No API key provided"
# Basic validation (check format)
if len(api_key) < 20:
return "", "โŒ Status: Invalid API key format (too short)"
# Key looks valid - it will be saved to localStorage via JavaScript
masked_key = f"...{api_key[-6:]}" if len(api_key) > 6 else "***"
return api_key, f"โœ… Status: API key saved to browser storage (ends with {masked_key})"
# Mode switching functions (updated for new order: Inspiration, Refinement, Chart)
def switch_to_inspiration():
return [
gr.update(variant="primary"), # inspiration_btn
gr.update(variant="secondary"), # ideation_btn
gr.update(variant="secondary"), # chart_gen_btn
gr.update(visible=True), # inspiration_container
gr.update(visible=False), # ideation_container
gr.update(visible=False), # chart_gen_container
]
def switch_to_ideation():
return [
gr.update(variant="secondary"), # inspiration_btn
gr.update(variant="primary"), # ideation_btn
gr.update(variant="secondary"), # chart_gen_btn
gr.update(visible=False), # inspiration_container
gr.update(visible=True), # ideation_container
gr.update(visible=False), # chart_gen_container
]
def switch_to_chart_gen():
return [
gr.update(variant="secondary"), # inspiration_btn
gr.update(variant="secondary"), # ideation_btn
gr.update(variant="primary"), # chart_gen_btn
gr.update(visible=False), # inspiration_container
gr.update(visible=False), # ideation_container
gr.update(visible=True), # chart_gen_container
]
# Wire up mode switching (updated order: inspiration, ideation, chart)
inspiration_btn.click(
fn=switch_to_inspiration,
inputs=[],
outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container]
)
ideation_btn.click(
fn=switch_to_ideation,
inputs=[],
outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container]
)
chart_gen_btn.click(
fn=switch_to_chart_gen,
inputs=[],
outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container]
)
# Connect API key validation and localStorage save
api_key_input.change(
fn=validate_api_key,
inputs=[api_key_input],
outputs=[api_key_state, api_key_status],
js="(key) => { saveApiKeyToStorage(key); return key; }"
)
# Generate chart when button is clicked (now with API key)
generate_chart_btn.click(
fn=generate_chart_from_csv,
inputs=[csv_upload, chart_prompt_input, api_key_state],
outputs=[chart_output]
)
# Search inspiration with loading state
def search_with_loading(prompt):
"""Wrapper to show loading state"""
if not prompt or not prompt.strip():
return """
<div style='padding: 50px; text-align: center;'>
Please enter a search query.
</div>
"""
# Show loading immediately (Gradio will display this first)
yield """
<div style='padding: 50px; text-align: center;'>
<div style='font-size: 2em; margin-bottom: 20px;'>๐Ÿ”</div>
<h3>Searching database...</h3>
<p style='color: #666;'>Analyzing your query and generating SQL...</p>
</div>
"""
# Run the actual search
import asyncio
result = asyncio.run(search_inspiration_from_database(prompt))
yield result
inspiration_search_btn.click(
fn=search_with_loading,
inputs=[inspiration_prompt_input],
outputs=[inspiration_cards_html]
)
# Knowledge base section (below both interfaces)
gr.Markdown("""
### About Viz LLM
**Credits:** Special thanks to the researchers whose work informed this model: Robert Kosara, Edward Segel, Jeffrey Heer, Matthew Conlen, John Maeda, Kennedy Elliott, Scott McCloud, and many others.
---
**Usage Limits:** This service is limited to 20 queries per day per user to manage costs. Responses are optimized for English.
<div style="text-align: center; margin-top: 20px; opacity: 0.6; font-size: 0.9em;">
Embeddings: Jina-CLIP-v2 | Charts: Datawrapper API | Database: Nuanced
</div>
""")
# Launch configuration
if __name__ == "__main__":
# Check for required environment variables (Datawrapper key now user-provided)
required_vars = ["SUPABASE_URL", "SUPABASE_KEY", "HF_TOKEN"]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"โš ๏ธ Warning: Missing environment variables: {', '.join(missing_vars)}")
print("Please set these in your .env file or as environment variables")
# Launch the app
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_api=True
)