Spaces:
Runtime error
Runtime error
| """ | |
| π¨π Live Apertus Transparency Dashboard | |
| Real-time visualization of all model internals | |
| """ | |
| import streamlit as st | |
| import sys | |
| import os | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src')) | |
| import torch | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from plotly.subplots import make_subplots | |
| import pandas as pd | |
| from apertus_core import ApertusCore | |
| from transparency_analyzer import ApertusTransparencyAnalyzer | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # Configure Streamlit | |
| st.set_page_config( | |
| page_title="π¨π Apertus Transparency Dashboard", | |
| page_icon="π¨π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| def load_apertus_model(): | |
| """Load Apertus model with caching""" | |
| with st.spinner("π§ Loading Apertus model..."): | |
| apertus = ApertusCore(enable_transparency=True) | |
| analyzer = ApertusTransparencyAnalyzer(apertus) | |
| return apertus, analyzer | |
| def create_attention_heatmap(attention_weights, tokens): | |
| """Create interactive attention heatmap""" | |
| fig = px.imshow( | |
| attention_weights, | |
| x=tokens, | |
| y=tokens, | |
| color_continuous_scale='Blues', | |
| title="Attention Pattern Heatmap", | |
| labels={'x': 'Key Tokens', 'y': 'Query Tokens', 'color': 'Attention Weight'} | |
| ) | |
| fig.update_layout( | |
| width=600, | |
| height=600, | |
| xaxis={'side': 'bottom', 'tickangle': 45}, | |
| yaxis={'side': 'left'} | |
| ) | |
| return fig | |
| def create_layer_evolution_plot(layer_stats): | |
| """Create layer-by-layer evolution plot""" | |
| fig = make_subplots( | |
| rows=2, cols=2, | |
| subplot_titles=('L2 Norms', 'Mean Activations', 'Std Deviations', 'Activation Ranges'), | |
| vertical_spacing=0.12 | |
| ) | |
| layers = [stat['layer'] for stat in layer_stats] | |
| # L2 Norms | |
| fig.add_trace( | |
| go.Scatter(x=layers, y=[stat['l2_norm'] for stat in layer_stats], | |
| mode='lines+markers', name='L2 Norm', line=dict(color='blue')), | |
| row=1, col=1 | |
| ) | |
| # Mean Activations | |
| fig.add_trace( | |
| go.Scatter(x=layers, y=[stat['mean'] for stat in layer_stats], | |
| mode='lines+markers', name='Mean', line=dict(color='red')), | |
| row=1, col=2 | |
| ) | |
| # Std Deviations | |
| fig.add_trace( | |
| go.Scatter(x=layers, y=[stat['std'] for stat in layer_stats], | |
| mode='lines+markers', name='Std Dev', line=dict(color='green')), | |
| row=2, col=1 | |
| ) | |
| # Activation Ranges | |
| fig.add_trace( | |
| go.Scatter(x=layers, y=[stat['max'] - stat['min'] for stat in layer_stats], | |
| mode='lines+markers', name='Range', line=dict(color='purple')), | |
| row=2, col=2 | |
| ) | |
| fig.update_layout(height=500, showlegend=False, title="Layer-by-Layer Neural Evolution") | |
| return fig | |
| def create_prediction_bar_chart(predictions): | |
| """Create token prediction bar chart""" | |
| tokens = [pred['token'] for pred in predictions[:10]] | |
| probs = [pred['probability'] for pred in predictions[:10]] | |
| fig = px.bar( | |
| x=tokens, y=probs, | |
| title="Top 10 Token Predictions", | |
| labels={'x': 'Tokens', 'y': 'Probability'}, | |
| color=probs, | |
| color_continuous_scale='Viridis' | |
| ) | |
| fig.update_layout(height=400, showlegend=False) | |
| return fig | |
| def create_architecture_overview(model_info): | |
| """Create model architecture visualization""" | |
| fig = go.Figure() | |
| # Create architecture diagram | |
| layers = model_info['num_layers'] | |
| hidden_size = model_info['hidden_size'] | |
| # Add layer blocks | |
| for i in range(min(8, layers)): # Show first 8 layers | |
| fig.add_shape( | |
| type="rect", | |
| x0=i, y0=0, x1=i+0.8, y1=1, | |
| fillcolor="lightblue", | |
| line=dict(color="darkblue", width=2) | |
| ) | |
| fig.add_annotation( | |
| x=i+0.4, y=0.5, | |
| text=f"L{i}", | |
| showarrow=False, | |
| font=dict(size=10) | |
| ) | |
| if layers > 8: | |
| fig.add_annotation( | |
| x=8.5, y=0.5, | |
| text=f"... {layers-8} more", | |
| showarrow=False, | |
| font=dict(size=12) | |
| ) | |
| fig.update_layout( | |
| title=f"Model Architecture ({layers} layers, {hidden_size}d hidden)", | |
| xaxis=dict(range=[-0.5, 9], showgrid=False, showticklabels=False), | |
| yaxis=dict(range=[-0.5, 1.5], showgrid=False, showticklabels=False), | |
| height=200, | |
| showlegend=False | |
| ) | |
| return fig | |
| def main(): | |
| """Main dashboard application""" | |
| # Header | |
| st.title("π¨π Apertus Swiss AI Transparency Dashboard") | |
| st.markdown("### Real-time visualization of all model internals") | |
| # Sidebar | |
| st.sidebar.title("π§ Analysis Settings") | |
| # Load model | |
| try: | |
| apertus, analyzer = load_apertus_model() | |
| st.sidebar.success("β Model loaded successfully!") | |
| # Model info in sidebar | |
| model_info = apertus.get_model_info() | |
| st.sidebar.markdown("### π Model Info") | |
| st.sidebar.write(f"**Model**: {model_info['model_name']}") | |
| st.sidebar.write(f"**Parameters**: {model_info['total_parameters']:,}") | |
| st.sidebar.write(f"**Layers**: {model_info['num_layers']}") | |
| st.sidebar.write(f"**Hidden Size**: {model_info['hidden_size']}") | |
| if 'gpu_memory_allocated_gb' in model_info: | |
| st.sidebar.write(f"**GPU Memory**: {model_info['gpu_memory_allocated_gb']:.1f} GB") | |
| except Exception as e: | |
| st.error(f"β Error loading model: {str(e)}") | |
| st.stop() | |
| # Input text | |
| st.markdown("### π Input Text") | |
| example_texts = [ | |
| "Apertus ist ein transparentes KI-Modell aus der Schweiz.", | |
| "Machine learning requires transparency for trust and understanding.", | |
| "La Suisse développe des modèles d'intelligence artificielle transparents.", | |
| "Artificial intelligence should be explainable and interpretable.", | |
| ] | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| input_text = st.text_area( | |
| "Enter text to analyze:", | |
| value=example_texts[0], | |
| height=100 | |
| ) | |
| with col2: | |
| st.markdown("**Examples:**") | |
| for i, example in enumerate(example_texts): | |
| if st.button(f"Example {i+1}", key=f"example_{i}"): | |
| input_text = example | |
| st.rerun() | |
| if not input_text.strip(): | |
| st.warning("Please enter some text to analyze.") | |
| st.stop() | |
| # Analysis settings | |
| st.sidebar.markdown("### βοΈ Analysis Options") | |
| show_architecture = st.sidebar.checkbox("Show Architecture", True) | |
| show_tokenization = st.sidebar.checkbox("Show Tokenization", True) | |
| show_layers = st.sidebar.checkbox("Show Layer Analysis", True) | |
| show_attention = st.sidebar.checkbox("Show Attention", True) | |
| show_predictions = st.sidebar.checkbox("Show Predictions", True) | |
| attention_layer = st.sidebar.slider("Attention Layer", 0, model_info['num_layers']-1, 15) | |
| num_predictions = st.sidebar.slider("Top-K Predictions", 5, 20, 10) | |
| # Run analysis | |
| if st.button("π Analyze Transparency", type="primary"): | |
| with st.spinner("π§ Analyzing model internals..."): | |
| # Architecture Overview | |
| if show_architecture: | |
| st.markdown("## ποΈ Model Architecture") | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| arch_fig = create_architecture_overview(model_info) | |
| st.plotly_chart(arch_fig, use_container_width=True) | |
| with col2: | |
| st.markdown("**Architecture Details:**") | |
| st.write(f"β’ **Type**: Transformer Decoder") | |
| st.write(f"β’ **Layers**: {model_info['num_layers']}") | |
| st.write(f"β’ **Attention Heads**: {model_info['num_attention_heads']}") | |
| st.write(f"β’ **Hidden Size**: {model_info['hidden_size']}") | |
| st.write(f"β’ **Parameters**: {model_info['total_parameters']:,}") | |
| st.write(f"β’ **Context**: {model_info['max_position_embeddings']:,} tokens") | |
| # Tokenization | |
| if show_tokenization: | |
| st.markdown("## π€ Tokenization Analysis") | |
| tokens = apertus.tokenizer.tokenize(input_text) | |
| token_ids = apertus.tokenizer.encode(input_text) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Token Breakdown:**") | |
| token_df = pd.DataFrame({ | |
| 'Position': range(1, len(tokens) + 1), | |
| 'Token': tokens, | |
| 'Token ID': token_ids[1:] if len(token_ids) > len(tokens) else token_ids | |
| }) | |
| st.dataframe(token_df, use_container_width=True) | |
| with col2: | |
| st.markdown("**Statistics:**") | |
| st.write(f"β’ **Original Text**: '{input_text}'") | |
| st.write(f"β’ **Token Count**: {len(tokens)}") | |
| st.write(f"β’ **Characters**: {len(input_text)}") | |
| st.write(f"β’ **Tokens/Characters**: {len(tokens)/len(input_text):.2f}") | |
| # Layer Analysis | |
| if show_layers: | |
| st.markdown("## π§ Layer-by-Layer Processing") | |
| # Get hidden states | |
| inputs = apertus.tokenizer(input_text, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = apertus.model(**inputs, output_hidden_states=True) | |
| hidden_states = outputs.hidden_states | |
| # Analyze sampled layers | |
| layer_stats = [] | |
| sample_layers = list(range(0, len(hidden_states), max(1, len(hidden_states)//8))) | |
| for layer_idx in sample_layers: | |
| layer_state = hidden_states[layer_idx][0] | |
| layer_stats.append({ | |
| 'layer': layer_idx, | |
| 'l2_norm': torch.norm(layer_state, dim=-1).mean().item(), | |
| 'mean': layer_state.mean().item(), | |
| 'std': layer_state.std().item(), | |
| 'max': layer_state.max().item(), | |
| 'min': layer_state.min().item() | |
| }) | |
| # Plot evolution | |
| evolution_fig = create_layer_evolution_plot(layer_stats) | |
| st.plotly_chart(evolution_fig, use_container_width=True) | |
| # Layer statistics table | |
| st.markdown("**Layer Statistics:**") | |
| stats_df = pd.DataFrame(layer_stats) | |
| stats_df = stats_df.round(4) | |
| st.dataframe(stats_df, use_container_width=True) | |
| # Attention Analysis | |
| if show_attention: | |
| st.markdown("## ποΈ Attention Pattern Analysis") | |
| # Get attention weights | |
| with torch.no_grad(): | |
| outputs = apertus.model(**inputs, output_attentions=True) | |
| attentions = outputs.attentions | |
| tokens = apertus.tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) | |
| if attention_layer < len(attentions): | |
| attention_weights = attentions[attention_layer][0] # Remove batch dim | |
| avg_attention = attention_weights.mean(dim=0).cpu().numpy() # Average heads | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| attention_fig = create_attention_heatmap(avg_attention, tokens) | |
| st.plotly_chart(attention_fig, use_container_width=True) | |
| with col2: | |
| st.markdown(f"**Layer {attention_layer} Statistics:**") | |
| st.write(f"β’ **Attention Heads**: {attention_weights.shape[0]}") | |
| st.write(f"β’ **Matrix Size**: {avg_attention.shape}") | |
| st.write(f"β’ **Entropy**: {-np.sum(avg_attention * np.log(avg_attention + 1e-12)):.2f}") | |
| # Most attended tokens | |
| attention_received = avg_attention.sum(axis=0) | |
| top_tokens = np.argsort(attention_received)[-3:][::-1] | |
| st.markdown("**Most Attended Tokens:**") | |
| for i, token_idx in enumerate(top_tokens): | |
| if token_idx < len(tokens): | |
| st.write(f"{i+1}. '{tokens[token_idx]}' ({attention_received[token_idx]:.3f})") | |
| else: | |
| st.error(f"Layer {attention_layer} not available. Max layer: {len(attentions)-1}") | |
| # Prediction Analysis | |
| if show_predictions: | |
| st.markdown("## π² Next Token Predictions") | |
| # Get predictions | |
| with torch.no_grad(): | |
| outputs = apertus.model(**inputs) | |
| logits = outputs.logits[0, -1, :] | |
| probabilities = torch.nn.functional.softmax(logits, dim=-1) | |
| top_probs, top_indices = torch.topk(probabilities, num_predictions) | |
| # Prepare prediction data | |
| predictions = [] | |
| for i in range(num_predictions): | |
| token_id = top_indices[i].item() | |
| token = apertus.tokenizer.decode([token_id]) | |
| prob = top_probs[i].item() | |
| logit = logits[token_id].item() | |
| predictions.append({ | |
| 'rank': i + 1, | |
| 'token': token, | |
| 'probability': prob, | |
| 'logit': logit | |
| }) | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| pred_fig = create_prediction_bar_chart(predictions) | |
| st.plotly_chart(pred_fig, use_container_width=True) | |
| with col2: | |
| st.markdown("**Prediction Statistics:**") | |
| entropy = -torch.sum(probabilities * torch.log(probabilities + 1e-12)).item() | |
| max_prob = probabilities.max().item() | |
| top_k_sum = top_probs.sum().item() | |
| st.write(f"β’ **Entropy**: {entropy:.2f}") | |
| st.write(f"β’ **Max Probability**: {max_prob:.1%}") | |
| st.write(f"β’ **Top-{num_predictions} Sum**: {top_k_sum:.1%}") | |
| confidence = "High" if max_prob > 0.5 else "Medium" if max_prob > 0.2 else "Low" | |
| st.write(f"β’ **Confidence**: {confidence}") | |
| # Predictions table | |
| st.markdown("**Top Predictions:**") | |
| pred_df = pd.DataFrame(predictions) | |
| pred_df['probability'] = pred_df['probability'].apply(lambda x: f"{x:.1%}") | |
| pred_df['logit'] = pred_df['logit'].apply(lambda x: f"{x:+.2f}") | |
| st.dataframe(pred_df[['rank', 'token', 'probability']], use_container_width=True) | |
| # Summary | |
| st.markdown("## π Transparency Summary") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Tokens Analyzed", len(tokens)) | |
| with col2: | |
| st.metric("Layers Processed", len(hidden_states)) | |
| with col3: | |
| st.metric("Attention Heads", model_info['num_attention_heads']) | |
| with col4: | |
| if 'gpu_memory_allocated_gb' in model_info: | |
| st.metric("GPU Memory", f"{model_info['gpu_memory_allocated_gb']:.1f} GB") | |
| else: | |
| st.metric("Parameters", f"{model_info['total_parameters']:,}") | |
| st.success("β Complete transparency analysis finished!") | |
| st.info("π¨π This demonstrates the full transparency capabilities of Apertus Swiss AI - " | |
| "every layer, attention pattern, and prediction is completely visible!") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("π¨π **Apertus Swiss AI** - The world's most transparent language model") | |
| if __name__ == "__main__": | |
| main() |