import gradio as gr from datasets import load_dataset import pandas as pd DATASETS = { "CS1": "withmartian/cs1_dataset", "CS2": "withmartian/cs2_dataset", "CS3": "withmartian/cs3_dataset", "CS2 Synonyms": "withmartian/cs2_dataset_synonyms", "CS3 Synonyms": "withmartian/cs3_dataset_synonyms", "CS4 Synonyms": "withmartian/cs4_dataset_synonyms", } COLUMNS = ["create_statement", "english_prompt", "sql_statement"] def load_preview(dataset_name): try: ds = load_dataset(DATASETS[dataset_name], split="train") df = pd.DataFrame(ds).head(500) if all(col in df.columns for col in COLUMNS): df = df[COLUMNS] return df except Exception as e: return pd.DataFrame({"Error": [str(e)]}) def filter_dataframe(df, search_query): if not search_query or df.empty or "Error" in df.columns: return df mask = df.astype(str).apply( lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1 ) return df[mask] # HuggingFace-style CSS hf_style_css = """ /* HuggingFace-inspired table styling */ .dataframe-container { border-radius: 12px !important; overflow: hidden !important; border: 1px solid #2A2A2A !important; background: #1A1A1A !important; } .dataframe table { border-collapse: separate !important; border-spacing: 0 !important; width: 100% !important; } .dataframe thead { background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%) !important; position: sticky !important; top: 0 !important; z-index: 10 !important; } .dataframe thead th { color: #FF6B4A !important; font-weight: 600 !important; text-align: left !important; padding: 1rem !important; border-bottom: 2px solid #FF6B4A !important; font-size: 0.9rem !important; text-transform: uppercase !important; letter-spacing: 0.5px !important; } .dataframe tbody tr { background: #1A1A1A !important; transition: all 0.2s ease !important; border-bottom: 1px solid #2A2A2A !important; } .dataframe tbody tr:hover { background: #2A2A2A !important; box-shadow: 0 2px 8px rgba(255, 107, 74, 0.1) !important; transform: scale(1.01) !important; } .dataframe tbody td { padding: 0.75rem 1rem !important; color: #D0D0D0 !important; font-size: 0.9rem !important; line-height: 1.5 !important; } .dataframe tbody tr:nth-child(even) { background: #181818 !important; } .dataframe tbody tr:nth-child(even):hover { background: #2A2A2A !important; } /* Cute badges for dataset types */ .dataset-badge { display: inline-block; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.8rem; font-weight: 600; margin: 0.25rem; } .badge-basic { background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%); color: white; } .badge-medium { background: linear-gradient(135deg, #FF9800 0%, #F57C00 100%); color: white; } .badge-advanced { background: linear-gradient(135deg, #f44336 0%, #d32f2f 100%); color: white; } /* Cute info boxes */ .cute-info-box { background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%); border-radius: 16px; padding: 1.5rem; margin: 1rem 0; border: 2px solid #FF6B4A; box-shadow: 0 4px 12px rgba(255, 107, 74, 0.15); position: relative; overflow: hidden; } .cute-info-box::before { content: ''; position: absolute; top: 0; left: 0; width: 4px; height: 100%; background: linear-gradient(180deg, #FF6B4A 0%, #FF5733 100%); } .cute-info-box h3 { color: #FF6B4A; font-size: 1.1rem; margin-bottom: 0.5rem; font-weight: 600; } .cute-info-box p { color: #D0D0D0; line-height: 1.6; margin: 0; } /* Loading animation */ .loading { display: inline-block; width: 20px; height: 20px; border: 3px solid #3A3A3A; border-top: 3px solid #FF6B4A; border-radius: 50%; animation: spin 1s linear infinite; } @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } } /* Cute buttons */ .cute-button { background: linear-gradient(135deg, #FF6B4A 0%, #FF5733 100%) !important; border: none !important; border-radius: 12px !important; padding: 0.75rem 1.5rem !important; font-weight: 600 !important; color: white !important; box-shadow: 0 4px 12px rgba(255, 107, 74, 0.3) !important; transition: all 0.3s ease !important; } .cute-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 16px rgba(255, 107, 74, 0.4) !important; } /* Search box */ .search-box input { background: #2A2A2A !important; border: 2px solid #3A3A3A !important; border-radius: 12px !important; padding: 0.75rem !important; color: #E0E0E0 !important; transition: all 0.3s ease !important; } .search-box input:focus { border-color: #FF6B4A !important; box-shadow: 0 0 0 3px rgba(255, 107, 74, 0.1) !important; } """ def dataset_viewer(shared_instruction, shared_schema): gr.HTML("""

📊 Dataset Explorer

Browse, search, and explore TinySQL datasets

""") gr.HTML("""

🎯 Quick Start

Select a dataset, click "Load Dataset", then use search to filter. Pick any row and send it to the Model Demo tab!

""") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🎛️ Controls") dataset_dropdown = gr.Dropdown( choices=list(DATASETS.keys()), value="CS1", label="Choose Dataset", info="Select complexity level" ) gr.HTML("""

Dataset Levels

CS1 Basic SELECT
CS2 + ORDER BY
CS3 + Aggregations
CS4 + WHERE filters
✨ Synonyms Natural variations
""") load_btn = gr.Button("📥 Load Dataset", variant="primary", size="lg", elem_classes="cute-button") gr.Markdown("### 🎯 Test Example") row_selector = gr.Number( label="Row Number", value=0, minimum=0, precision=0, info="Pick a row to test" ) send_to_model_btn = gr.Button("🚀 Run in Model Demo", variant="primary", elem_classes="cute-button") with gr.Column(scale=3): gr.Markdown("### 📋 Dataset Preview") search_box = gr.Textbox( label="🔍 Search", placeholder="Search across all columns...", lines=1, elem_classes="search-box" ) df_display = gr.Dataframe( headers=COLUMNS, datatype=["str", "str", "str"], interactive=False, wrap=True, label="Results", elem_classes="dataframe-container" ) stats_display = gr.Markdown( "👆 Click **Load Dataset** to begin exploring", elem_classes="stats-info" ) df_state = gr.State(value=pd.DataFrame()) def load_and_display(dataset_name): df = load_preview(dataset_name) if "Error" in df.columns: return df, df, "❌ Error loading dataset" stats = f"✅ **Loaded {len(df)} rows** • {', '.join(COLUMNS)}" return df, df, stats load_btn.click( fn=load_and_display, inputs=dataset_dropdown, outputs=[df_state, df_display, stats_display] ) def search_and_display(df, query): if df.empty: return df, "⚠️ Load a dataset first" filtered_df = filter_dataframe(df, query) stats = f"📊 **Showing {len(filtered_df)} of {len(df)} rows**" if query: stats += f" • 🔍 Search: '{query}'" return filtered_df, stats search_box.change( fn=search_and_display, inputs=[df_state, search_box], outputs=[df_display, stats_display] ) def send_to_model(df, row_num): if df.empty or row_num >= len(df): return "", "", "⚠️ Invalid row or no data loaded" row = df.iloc[int(row_num)] instruction = row['english_prompt'] if 'english_prompt' in row else "" schema = row['create_statement'] if 'create_statement' in row else "" return instruction, schema, f"✅ **Row {row_num} loaded!** Switch to Model Demo tab 👉" send_to_model_btn.click( fn=send_to_model, inputs=[df_state, row_selector], outputs=[shared_instruction, shared_schema, stats_display] ) return {'df_state': df_state, 'df_display': df_display}