tinysql-demo / tinysql_dataset_viewer.py
abir-hr196's picture
updates
0e1846b
raw
history blame
10.9 kB
import gradio as gr
from datasets import load_dataset
import pandas as pd
DATASETS = {
"CS1": "withmartian/cs1_dataset",
"CS2": "withmartian/cs2_dataset",
"CS3": "withmartian/cs3_dataset",
"CS2 Synonyms": "withmartian/cs2_dataset_synonyms",
"CS3 Synonyms": "withmartian/cs3_dataset_synonyms",
"CS4 Synonyms": "withmartian/cs4_dataset_synonyms",
}
COLUMNS = ["create_statement", "english_prompt", "sql_statement"]
def load_preview(dataset_name):
try:
ds = load_dataset(DATASETS[dataset_name], split="train")
df = pd.DataFrame(ds).head(500)
if all(col in df.columns for col in COLUMNS):
df = df[COLUMNS]
return df
except Exception as e:
return pd.DataFrame({"Error": [str(e)]})
def filter_dataframe(df, search_query):
if not search_query or df.empty or "Error" in df.columns:
return df
mask = df.astype(str).apply(
lambda row: row.str.contains(search_query, case=False, na=False).any(),
axis=1
)
return df[mask]
# HuggingFace-style CSS
hf_style_css = """
/* HuggingFace-inspired table styling */
.dataframe-container {
border-radius: 12px !important;
overflow: hidden !important;
border: 1px solid #2A2A2A !important;
background: #1A1A1A !important;
}
.dataframe table {
border-collapse: separate !important;
border-spacing: 0 !important;
width: 100% !important;
}
.dataframe thead {
background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%) !important;
position: sticky !important;
top: 0 !important;
z-index: 10 !important;
}
.dataframe thead th {
color: #FF6B4A !important;
font-weight: 600 !important;
text-align: left !important;
padding: 1rem !important;
border-bottom: 2px solid #FF6B4A !important;
font-size: 0.9rem !important;
text-transform: uppercase !important;
letter-spacing: 0.5px !important;
}
.dataframe tbody tr {
background: #1A1A1A !important;
transition: all 0.2s ease !important;
border-bottom: 1px solid #2A2A2A !important;
}
.dataframe tbody tr:hover {
background: #2A2A2A !important;
box-shadow: 0 2px 8px rgba(255, 107, 74, 0.1) !important;
transform: scale(1.01) !important;
}
.dataframe tbody td {
padding: 0.75rem 1rem !important;
color: #D0D0D0 !important;
font-size: 0.9rem !important;
line-height: 1.5 !important;
}
.dataframe tbody tr:nth-child(even) {
background: #181818 !important;
}
.dataframe tbody tr:nth-child(even):hover {
background: #2A2A2A !important;
}
/* Cute badges for dataset types */
.dataset-badge {
display: inline-block;
padding: 0.25rem 0.75rem;
border-radius: 12px;
font-size: 0.8rem;
font-weight: 600;
margin: 0.25rem;
}
.badge-basic {
background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
color: white;
}
.badge-medium {
background: linear-gradient(135deg, #FF9800 0%, #F57C00 100%);
color: white;
}
.badge-advanced {
background: linear-gradient(135deg, #f44336 0%, #d32f2f 100%);
color: white;
}
/* Cute info boxes */
.cute-info-box {
background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%);
border-radius: 16px;
padding: 1.5rem;
margin: 1rem 0;
border: 2px solid #FF6B4A;
box-shadow: 0 4px 12px rgba(255, 107, 74, 0.15);
position: relative;
overflow: hidden;
}
.cute-info-box::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 4px;
height: 100%;
background: linear-gradient(180deg, #FF6B4A 0%, #FF5733 100%);
}
.cute-info-box h3 {
color: #FF6B4A;
font-size: 1.1rem;
margin-bottom: 0.5rem;
font-weight: 600;
}
.cute-info-box p {
color: #D0D0D0;
line-height: 1.6;
margin: 0;
}
/* Loading animation */
.loading {
display: inline-block;
width: 20px;
height: 20px;
border: 3px solid #3A3A3A;
border-top: 3px solid #FF6B4A;
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Cute buttons */
.cute-button {
background: linear-gradient(135deg, #FF6B4A 0%, #FF5733 100%) !important;
border: none !important;
border-radius: 12px !important;
padding: 0.75rem 1.5rem !important;
font-weight: 600 !important;
color: white !important;
box-shadow: 0 4px 12px rgba(255, 107, 74, 0.3) !important;
transition: all 0.3s ease !important;
}
.cute-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 16px rgba(255, 107, 74, 0.4) !important;
}
/* Search box */
.search-box input {
background: #2A2A2A !important;
border: 2px solid #3A3A3A !important;
border-radius: 12px !important;
padding: 0.75rem !important;
color: #E0E0E0 !important;
transition: all 0.3s ease !important;
}
.search-box input:focus {
border-color: #FF6B4A !important;
box-shadow: 0 0 0 3px rgba(255, 107, 74, 0.1) !important;
}
"""
def dataset_viewer(shared_instruction, shared_schema):
gr.HTML("""
<div style="text-align: center; padding: 2rem 1.5rem; background: linear-gradient(135deg, #2A2A2A 0%, #3A3A3A 100%); border-radius: 16px; margin-bottom: 1.5rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
<h2 style="font-size: 2rem; font-weight: 700; margin-bottom: 0.5rem; color: #FF6B4A;">πŸ“Š Dataset Explorer</h2>
<p style="font-size: 1rem; opacity: 0.9; line-height: 1.6; color: #D0D0D0;">
Browse, search, and explore TinySQL datasets
</p>
</div>
""")
gr.HTML("""
<div class="cute-info-box">
<h3>🎯 Quick Start</h3>
<p>Select a dataset, click "Load Dataset", then use search to filter. Pick any row and send it to the Model Demo tab!</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸŽ›οΈ Controls")
dataset_dropdown = gr.Dropdown(
choices=list(DATASETS.keys()),
value="CS1",
label="Choose Dataset",
info="Select complexity level"
)
gr.HTML("""
<div style="background: #2A2A2A; border-radius: 12px; padding: 1.25rem; margin: 1rem 0; border: 1px solid #3A3A3A;">
<h4 style="color: #FF6B4A; font-size: 0.95rem; margin-bottom: 1rem;">Dataset Levels</h4>
<div style="margin: 0.5rem 0;">
<span class="dataset-badge badge-basic">CS1</span>
<span style="color: #999; font-size: 0.85rem; margin-left: 0.5rem;">Basic SELECT</span>
</div>
<div style="margin: 0.5rem 0;">
<span class="dataset-badge badge-basic">CS2</span>
<span style="color: #999; font-size: 0.85rem; margin-left: 0.5rem;">+ ORDER BY</span>
</div>
<div style="margin: 0.5rem 0;">
<span class="dataset-badge badge-medium">CS3</span>
<span style="color: #999; font-size: 0.85rem; margin-left: 0.5rem;">+ Aggregations</span>
</div>
<div style="margin: 0.5rem 0;">
<span class="dataset-badge badge-advanced">CS4</span>
<span style="color: #999; font-size: 0.85rem; margin-left: 0.5rem;">+ WHERE filters</span>
</div>
<div style="margin: 0.5rem 0; padding-top: 0.5rem; border-top: 1px solid #3A3A3A;">
<span style="color: #FF6B4A; font-size: 0.85rem;">✨ Synonyms</span>
<span style="color: #999; font-size: 0.85rem; margin-left: 0.5rem;">Natural variations</span>
</div>
</div>
""")
load_btn = gr.Button("πŸ“₯ Load Dataset", variant="primary", size="lg", elem_classes="cute-button")
gr.Markdown("### 🎯 Test Example")
row_selector = gr.Number(
label="Row Number",
value=0,
minimum=0,
precision=0,
info="Pick a row to test"
)
send_to_model_btn = gr.Button("πŸš€ Run in Model Demo", variant="primary", elem_classes="cute-button")
with gr.Column(scale=3):
gr.Markdown("### πŸ“‹ Dataset Preview")
search_box = gr.Textbox(
label="πŸ” Search",
placeholder="Search across all columns...",
lines=1,
elem_classes="search-box"
)
df_display = gr.Dataframe(
headers=COLUMNS,
datatype=["str", "str", "str"],
interactive=False,
wrap=True,
label="Results",
elem_classes="dataframe-container"
)
stats_display = gr.Markdown(
"πŸ‘† Click **Load Dataset** to begin exploring",
elem_classes="stats-info"
)
df_state = gr.State(value=pd.DataFrame())
def load_and_display(dataset_name):
df = load_preview(dataset_name)
if "Error" in df.columns:
return df, df, "❌ Error loading dataset"
stats = f"βœ… **Loaded {len(df)} rows** β€’ {', '.join(COLUMNS)}"
return df, df, stats
load_btn.click(
fn=load_and_display,
inputs=dataset_dropdown,
outputs=[df_state, df_display, stats_display]
)
def search_and_display(df, query):
if df.empty:
return df, "⚠️ Load a dataset first"
filtered_df = filter_dataframe(df, query)
stats = f"πŸ“Š **Showing {len(filtered_df)} of {len(df)} rows**"
if query:
stats += f" β€’ πŸ” Search: '{query}'"
return filtered_df, stats
search_box.change(
fn=search_and_display,
inputs=[df_state, search_box],
outputs=[df_display, stats_display]
)
def send_to_model(df, row_num):
if df.empty or row_num >= len(df):
return "", "", "⚠️ Invalid row or no data loaded"
row = df.iloc[int(row_num)]
instruction = row['english_prompt'] if 'english_prompt' in row else ""
schema = row['create_statement'] if 'create_statement' in row else ""
return instruction, schema, f"βœ… **Row {row_num} loaded!** Switch to Model Demo tab πŸ‘‰"
send_to_model_btn.click(
fn=send_to_model,
inputs=[df_state, row_selector],
outputs=[shared_instruction, shared_schema, stats_display]
)
return {'df_state': df_state, 'df_display': df_display}