DSDUDEd commited on
Commit
0f71c84
Β·
verified Β·
1 Parent(s): d65bc64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -56
app.py CHANGED
@@ -1,69 +1,76 @@
1
- import os
2
  import asyncio
3
- import pandas as pd
4
- from datasets import load_dataset
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
- import torch
7
  import gradio as gr
 
 
8
 
9
- # Paths
10
- PROMPTS_CSV = "prompts.csv"
11
- MODEL_NAME = "LiquidAI/LFM2-2.6B"
12
 
13
- # Check for dataset, download if missing
14
- if not os.path.exists(PROMPTS_CSV):
15
- print("prompts.csv not found. Downloading dataset from Hugging Face...")
16
- dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")
17
- df = pd.DataFrame(dataset)
18
- df.to_csv(PROMPTS_CSV, index=False)
19
- print("Dataset saved to prompts.csv")
20
- else:
21
- df = pd.read_csv(PROMPTS_CSV)
22
 
23
- all_prompts = df['prompt'].tolist()
24
- print(f"Total prompts available: {len(all_prompts)}")
 
 
 
 
 
25
 
26
- # Load first 20 prompts for fast startup
27
- fast_prompts = all_prompts[:20]
28
- remaining_prompts = all_prompts[20:]
29
 
30
- # Load tokenizer and model
31
- print("Loading model...")
32
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
33
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
34
- device = "cuda" if torch.cuda.is_available() else "cpu"
35
- model.to(device)
36
- print(f"Model loaded on {device}")
37
 
38
- # Async function to load remaining prompts
39
- async def load_remaining_prompts():
40
- global fast_prompts
41
- print("Loading remaining prompts asynchronously...")
42
- await asyncio.sleep(1) # simulate async loading
43
- fast_prompts.extend(remaining_prompts)
44
- print("All prompts loaded.")
45
 
46
- # Function to generate response
47
- def generate_response(prompt, max_tokens=100):
48
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
49
- outputs = model.generate(**inputs, max_new_tokens=max_tokens)
50
- response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
51
- return response
 
 
 
52
 
 
53
  # Gradio interface
54
- def chat_with_prompt(prompt_idx):
55
- prompt = fast_prompts[prompt_idx]
56
- response = generate_response(prompt)
57
- return f"Prompt:\n{prompt}\n\nResponse:\n{response}"
58
-
59
- with gr.Blocks() as demo:
60
- gr.Markdown("## ChatGPT Prompt Tester")
61
- prompt_dropdown = gr.Dropdown(choices=[str(i) for i in range(len(fast_prompts))], label="Select Prompt Index")
62
- output_text = gr.Textbox(label="Model Response", lines=15)
63
- prompt_dropdown.change(chat_with_prompt, inputs=prompt_dropdown, outputs=output_text)
 
64
 
65
- # Run async loading in the background
66
- asyncio.create_task(load_remaining_prompts())
 
 
 
 
 
67
 
68
- # Launch Gradio
69
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
1
  import asyncio
2
+ import random
 
 
 
3
  import gradio as gr
4
+ from datasets import load_dataset
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
 
7
+ print("===== Application Startup =====")
 
 
8
 
9
+ # -----------------------
10
+ # Load model
11
+ # -----------------------
12
+ print("Loading model...")
13
+ model_name = "gpt2" # you can swap this for a larger model if you have GPU
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(model_name)
16
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
17
+ print("Model loaded successfully.")
18
 
19
+ # -----------------------
20
+ # Load dataset
21
+ # -----------------------
22
+ print("Fetching dataset...")
23
+ dataset = load_dataset("lvwerra/stack-exchange-paired", split="train[:200]")
24
+ # limit to 200 for speed – you can increase if you want
25
+ print(f"Total prompts available: {len(dataset)}")
26
 
27
+ # Split dataset
28
+ initial_prompts = dataset[:20] # first 20 for fast startup
29
+ remaining_prompts = dataset[20:] # remaining ~180
30
 
31
+ # Storage for loaded prompts
32
+ prompts = []
33
+ for item in initial_prompts:
34
+ prompts.append(item["question"])
 
 
 
35
 
36
+ print(f"Loaded {len(prompts)} initial prompts for fast startup.")
 
 
 
 
 
 
37
 
38
+ # -----------------------
39
+ # Async loading of remaining prompts
40
+ # -----------------------
41
+ async def load_remaining_prompts():
42
+ print("Background: Loading remaining prompts...")
43
+ await asyncio.sleep(2) # simulate delay
44
+ for item in remaining_prompts:
45
+ prompts.append(item["question"])
46
+ print(f"Background: Finished loading. Total prompts now = {len(prompts)}")
47
 
48
+ # -----------------------
49
  # Gradio interface
50
+ # -----------------------
51
+ def chat_with_model(user_input):
52
+ """Respond to user with a random dataset prompt + model output."""
53
+ if not prompts:
54
+ return "Prompts not ready yet. Please wait..."
55
+ prompt = random.choice(prompts)
56
+ response = generator(f"{prompt}\n\nUser: {user_input}\nAI:",
57
+ max_length=100,
58
+ num_return_sequences=1,
59
+ do_sample=True)[0]["generated_text"]
60
+ return response
61
 
62
+ demo = gr.Interface(
63
+ fn=chat_with_model,
64
+ inputs=gr.Textbox(lines=2, placeholder="Ask me something..."),
65
+ outputs="text",
66
+ title="Fast Prompt Loader Chatbot",
67
+ description="Loads 20 prompts fast, then background loads 200+ prompts"
68
+ )
69
 
70
+ # -----------------------
71
+ # App runner
72
+ # -----------------------
73
+ if __name__ == "__main__":
74
+ loop = asyncio.get_event_loop()
75
+ loop.create_task(load_remaining_prompts()) # schedule async loading
76
+ demo.launch(server_name="0.0.0.0", server_port=7860)