aamanlamba Claude commited on
Commit
ffe0724
·
1 Parent(s): b304992

Add Google Gemini AI Assistant chatbot

Browse files

Features:
- AI Assistant tab powered by Google Gemini (gemini-1.5-flash)
- Natural language lineage generation from pipeline descriptions
- Auto-extract JSON from AI responses
- One-click transfer of generated JSON to lineage tool
- Updated hero section and footer with AI Assistant info

Sponsor integration: Google Gemini API

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +209 -0
  3. requirements.txt +1 -0
README.md CHANGED
@@ -290,7 +290,7 @@ python test_setup.py
290
  ## 🔜 Roadmap
291
 
292
  - [x] Gradio 6 upgrade for enhanced UI components
293
- - [ ] Agentic chatbot for natural language queries
294
  - [x] Apache Atlas export support
295
  - [ ] File upload functionality
296
  - [x] Graph export as PNG/SVG
 
290
  ## 🔜 Roadmap
291
 
292
  - [x] Gradio 6 upgrade for enhanced UI components
293
+ - [x] Agentic chatbot for natural language queries (Google Gemini)
294
  - [x] Apache Atlas export support
295
  - [ ] File upload functionality
296
  - [x] Graph export as PNG/SVG
app.py CHANGED
@@ -22,6 +22,13 @@ try:
22
  except ImportError:
23
  EXPORTERS_AVAILABLE = False
24
 
 
 
 
 
 
 
 
25
  # ============================================================================
26
  # Constants and Configuration
27
  # ============================================================================
@@ -773,6 +780,127 @@ def extract_lineage_from_url(
773
  return render_mermaid(viz), f"Lineage from URL: {url or 'not specified'}"
774
 
775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
776
  # ============================================================================
777
  # Gradio UI
778
  # ============================================================================
@@ -797,6 +925,7 @@ with gr.Blocks(
797
  | **Visualize** | Generate interactive Mermaid diagrams with color-coded nodes and relationship labels |
798
  | **Export** | Export to enterprise data catalogs: OpenLineage, Collibra, Purview, Alation, Atlas |
799
  | **MCP Integration** | Connect to MCP servers for AI-powered metadata extraction |
 
800
 
801
  ### Quick Start
802
 
@@ -1046,6 +1175,85 @@ with gr.Blocks(
1046
  outputs=[demo_viz, demo_summary]
1047
  )
1048
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
  # Footer
1050
  gr.Markdown("""
1051
  ---
@@ -1058,6 +1266,7 @@ with gr.Blocks(
1058
  | **Collibra** | Collibra Data Intelligence | Enterprise data governance |
1059
  | **Purview** | Microsoft Purview | Azure ecosystem |
1060
  | **Alation** | Alation Data Catalog | Self-service analytics |
 
1061
 
1062
  ---
1063
 
 
22
  except ImportError:
23
  EXPORTERS_AVAILABLE = False
24
 
25
+ # Import Google Gemini for agentic chatbot
26
+ try:
27
+ import google.generativeai as genai
28
+ GEMINI_AVAILABLE = True
29
+ except ImportError:
30
+ GEMINI_AVAILABLE = False
31
+
32
  # ============================================================================
33
  # Constants and Configuration
34
  # ============================================================================
 
780
  return render_mermaid(viz), f"Lineage from URL: {url or 'not specified'}"
781
 
782
 
783
+ # ============================================================================
784
+ # Gemini Agentic Chatbot
785
+ # ============================================================================
786
+
787
+ LINEAGE_AGENT_PROMPT = """You are a Data Lineage Assistant powered by the Lineage Graph Accelerator tool.
788
+ You help users understand, extract, and visualize data lineage from various sources.
789
+
790
+ Your capabilities:
791
+ 1. **Extract Lineage**: Parse metadata from dbt manifests, Airflow DAGs, SQL DDL, and custom JSON
792
+ 2. **Explain Lineage**: Help users understand data flow and dependencies
793
+ 3. **Generate Metadata**: Create lineage JSON from natural language descriptions
794
+ 4. **Export Guidance**: Advise on exporting to data catalogs (OpenLineage, Collibra, Purview, Alation, Atlas)
795
+
796
+ When users describe their data pipeline, generate valid JSON lineage in this format:
797
+ ```json
798
+ {
799
+ "nodes": [
800
+ {"id": "unique_id", "type": "source|table|model|view|report", "name": "Display Name"}
801
+ ],
802
+ "edges": [
803
+ {"from": "source_id", "to": "target_id"}
804
+ ]
805
+ }
806
+ ```
807
+
808
+ Node types: source, table, model, view, report, dimension, fact, destination, task
809
+
810
+ Be helpful, concise, and always offer to generate lineage JSON when users describe data flows.
811
+ If the user provides metadata or describes a pipeline, generate the JSON they can paste into the tool."""
812
+
813
+
814
+ def init_gemini(api_key: str) -> bool:
815
+ """Initialize Gemini with the provided API key."""
816
+ if not GEMINI_AVAILABLE:
817
+ return False
818
+ if not api_key:
819
+ return False
820
+ try:
821
+ genai.configure(api_key=api_key)
822
+ return True
823
+ except Exception:
824
+ return False
825
+
826
+
827
+ def chat_with_gemini(
828
+ message: str,
829
+ history: List[Dict[str, str]],
830
+ api_key: str
831
+ ) -> Tuple[List[Dict[str, str]], str]:
832
+ """Chat with Gemini about data lineage."""
833
+ if not GEMINI_AVAILABLE:
834
+ return history + [
835
+ {"role": "user", "content": message},
836
+ {"role": "assistant", "content": "Google Gemini is not available. Please install google-generativeai package."}
837
+ ], ""
838
+
839
+ if not api_key:
840
+ return history + [
841
+ {"role": "user", "content": message},
842
+ {"role": "assistant", "content": "Please enter your Google Gemini API key to use the chatbot. You can get one at https://makersuite.google.com/app/apikey"}
843
+ ], ""
844
+
845
+ try:
846
+ genai.configure(api_key=api_key)
847
+ model = genai.GenerativeModel('gemini-1.5-flash')
848
+
849
+ # Build conversation history for context
850
+ chat_history = []
851
+ for msg in history:
852
+ role = "user" if msg.get("role") == "user" else "model"
853
+ chat_history.append({"role": role, "parts": [msg.get("content", "")]})
854
+
855
+ # Start chat with history
856
+ chat = model.start_chat(history=chat_history)
857
+
858
+ # Send message with system prompt context
859
+ full_prompt = f"{LINEAGE_AGENT_PROMPT}\n\nUser query: {message}"
860
+ response = chat.send_message(full_prompt)
861
+
862
+ assistant_message = response.text
863
+
864
+ # Extract any JSON from the response for the metadata field
865
+ extracted_json = ""
866
+ if "```json" in assistant_message:
867
+ try:
868
+ json_start = assistant_message.find("```json") + 7
869
+ json_end = assistant_message.find("```", json_start)
870
+ if json_end > json_start:
871
+ extracted_json = assistant_message[json_start:json_end].strip()
872
+ except Exception:
873
+ pass
874
+
875
+ new_history = history + [
876
+ {"role": "user", "content": message},
877
+ {"role": "assistant", "content": assistant_message}
878
+ ]
879
+
880
+ return new_history, extracted_json
881
+
882
+ except Exception as e:
883
+ error_msg = f"Error communicating with Gemini: {str(e)}"
884
+ return history + [
885
+ {"role": "user", "content": message},
886
+ {"role": "assistant", "content": error_msg}
887
+ ], ""
888
+
889
+
890
+ def use_generated_json(json_text: str) -> Tuple[str, str, str]:
891
+ """Use the generated JSON in the lineage extractor."""
892
+ if not json_text.strip():
893
+ return "", "", "No JSON to use. Ask the chatbot to generate lineage JSON first."
894
+
895
+ try:
896
+ # Validate JSON
897
+ json.loads(json_text)
898
+ # Return the JSON to be used in the main tab
899
+ return json_text, "Custom JSON", "JSON copied to metadata input. Switch to 'Text/File Metadata' tab and click 'Extract Lineage'."
900
+ except json.JSONDecodeError as e:
901
+ return "", "", f"Invalid JSON: {str(e)}"
902
+
903
+
904
  # ============================================================================
905
  # Gradio UI
906
  # ============================================================================
 
925
  | **Visualize** | Generate interactive Mermaid diagrams with color-coded nodes and relationship labels |
926
  | **Export** | Export to enterprise data catalogs: OpenLineage, Collibra, Purview, Alation, Atlas |
927
  | **MCP Integration** | Connect to MCP servers for AI-powered metadata extraction |
928
+ | **AI Assistant** | Chat with Gemini to generate lineage from natural language descriptions |
929
 
930
  ### Quick Start
931
 
 
1175
  outputs=[demo_viz, demo_summary]
1176
  )
1177
 
1178
+ # Tab 5: AI Chatbot (Gemini)
1179
+ with gr.Tab("AI Assistant", id="chatbot"):
1180
+ gr.Markdown("""
1181
+ ## Lineage AI Assistant (Powered by Google Gemini)
1182
+
1183
+ Ask questions about data lineage, describe your data pipeline in natural language,
1184
+ and get JSON metadata you can use to visualize lineage.
1185
+
1186
+ **Examples:**
1187
+ - "I have a PostgreSQL database that feeds into a Spark ETL job, which outputs to a Snowflake warehouse"
1188
+ - "Generate lineage for a dbt project with staging, intermediate, and mart layers"
1189
+ - "What's the best way to document column-level lineage?"
1190
+ """)
1191
+
1192
+ with gr.Row():
1193
+ with gr.Column(scale=2):
1194
+ gemini_api_key = gr.Textbox(
1195
+ label="Google Gemini API Key",
1196
+ placeholder="Enter your Gemini API key (get one at makersuite.google.com)",
1197
+ type="password",
1198
+ info="Your API key is not stored and only used for this session"
1199
+ )
1200
+
1201
+ chatbot_display = gr.Chatbot(
1202
+ label="Chat with Lineage AI",
1203
+ height=400
1204
+ )
1205
+
1206
+ with gr.Row():
1207
+ chat_input = gr.Textbox(
1208
+ label="Your message",
1209
+ placeholder="Describe your data pipeline or ask about lineage...",
1210
+ lines=2,
1211
+ scale=4
1212
+ )
1213
+ send_btn = gr.Button("Send", variant="primary", scale=1)
1214
+
1215
+ with gr.Accordion("Generated JSON (if any)", open=False):
1216
+ generated_json = gr.Code(
1217
+ label="Extracted JSON",
1218
+ language="json",
1219
+ lines=10
1220
+ )
1221
+ use_json_btn = gr.Button("Use This JSON in Lineage Tool", size="sm")
1222
+ json_status = gr.Textbox(label="Status", interactive=False)
1223
+
1224
+ # Chat handlers
1225
+ chat_state = gr.State([])
1226
+
1227
+ def handle_chat(message, history, api_key):
1228
+ if not message.strip():
1229
+ return history, "", history
1230
+ new_history, extracted = chat_with_gemini(message, history, api_key)
1231
+ return new_history, extracted, new_history
1232
+
1233
+ send_btn.click(
1234
+ fn=handle_chat,
1235
+ inputs=[chat_input, chat_state, gemini_api_key],
1236
+ outputs=[chatbot_display, generated_json, chat_state]
1237
+ ).then(
1238
+ fn=lambda: "",
1239
+ outputs=[chat_input]
1240
+ )
1241
+
1242
+ chat_input.submit(
1243
+ fn=handle_chat,
1244
+ inputs=[chat_input, chat_state, gemini_api_key],
1245
+ outputs=[chatbot_display, generated_json, chat_state]
1246
+ ).then(
1247
+ fn=lambda: "",
1248
+ outputs=[chat_input]
1249
+ )
1250
+
1251
+ use_json_btn.click(
1252
+ fn=use_generated_json,
1253
+ inputs=[generated_json],
1254
+ outputs=[metadata_input, source_type, json_status]
1255
+ )
1256
+
1257
  # Footer
1258
  gr.Markdown("""
1259
  ---
 
1266
  | **Collibra** | Collibra Data Intelligence | Enterprise data governance |
1267
  | **Purview** | Microsoft Purview | Azure ecosystem |
1268
  | **Alation** | Alation Data Catalog | Self-service analytics |
1269
+ | **Atlas** | Apache Atlas | Open-source governance |
1270
 
1271
  ---
1272
 
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  gradio>=6.0.0
2
  anthropic>=0.25.0
3
  google-cloud-bigquery>=3.10.0
 
4
  requests>=2.31.0
5
  pyyaml>=6.0
 
1
  gradio>=6.0.0
2
  anthropic>=0.25.0
3
  google-cloud-bigquery>=3.10.0
4
+ google-generativeai>=0.8.0
5
  requests>=2.31.0
6
  pyyaml>=6.0