DSDUDEd commited on
Commit
ac2910f
Β·
verified Β·
1 Parent(s): 874a433

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -71
app.py CHANGED
@@ -1,85 +1,50 @@
1
  import asyncio
2
- from datasets import load_dataset
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch
 
 
 
 
 
5
 
6
- # ---------- CONFIG ----------
7
- MODEL_NAME = "LiquidAI/LFM2-2.6B" # Replace with your model
8
- MAX_PROMPTS_INITIAL = 20 # Initial prompts to load fast
9
- MAX_PROMPTS_TOTAL = 200 # Load all prompts later
10
- MAX_NEW_TOKENS = 50
11
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
- # ---------- FUNCTION TO LOAD PROMPTS ----------
 
 
14
  async def fetch_prompts():
15
- """
16
- Load prompts using Hugging Face datasets library.
17
- Returns a list of prompts.
18
- """
19
  dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")
20
- # Extract prompts as a simple list
21
- prompts = [item["prompt"] for item in dataset]
22
- return prompts
23
-
24
- # ---------- FUNCTION TO LOAD MODEL ----------
25
- def load_model():
26
- """
27
- Load tokenizer and model to device.
28
- """
29
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
30
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
31
- model.to(DEVICE)
32
- return tokenizer, model
33
-
34
- # ---------- FUNCTION TO GENERATE RESPONSE ----------
35
- def generate_response(tokenizer, model, prompt, max_new_tokens=MAX_NEW_TOKENS):
36
- """
37
- Generate a response from the model for a given prompt.
38
- """
39
- messages = [{"role": "user", "content": prompt}]
40
- inputs = tokenizer.apply_chat_template(
41
- messages,
42
- add_generation_prompt=True,
43
- tokenize=True,
44
- return_dict=True,
45
- return_tensors="pt"
46
- ).to(DEVICE)
47
-
48
- outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
49
- response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])
50
- return response
51
 
52
- # ---------- MAIN FUNCTION ----------
 
 
53
  async def main():
54
- print("Loading model...")
55
- tokenizer, model = load_model()
56
- print("Model loaded on", DEVICE)
57
-
58
- print("Fetching initial prompts...")
59
  all_prompts = await fetch_prompts()
60
- print(f"Total prompts available: {len(all_prompts)}")
61
 
62
- # Load initial batch
63
- initial_prompts = all_prompts[:MAX_PROMPTS_INITIAL]
64
- print(f"Using first {MAX_PROMPTS_INITIAL} prompts for fast startup...")
 
 
65
 
66
- # Generate responses for initial prompts
67
- for i, prompt in enumerate(initial_prompts, 1):
68
- response = generate_response(tokenizer, model, prompt)
69
- print(f"[Prompt {i}] {prompt}")
70
- print(f"[Response {i}] {response}\n")
71
 
72
- # Optionally, load all remaining prompts asynchronously after initial batch
73
- remaining_prompts = all_prompts[MAX_PROMPTS_INITIAL:MAX_PROMPTS_TOTAL]
74
- if remaining_prompts:
75
- print(f"Loading remaining {len(remaining_prompts)} prompts asynchronously...")
76
- # Here you could generate responses asynchronously if desired
77
- # Example: loop over remaining prompts and generate later
78
- await asyncio.sleep(0.1) # placeholder for async tasks
79
 
80
- # ---------- ENTRY POINT ----------
 
 
81
  if __name__ == "__main__":
82
- try:
83
- asyncio.run(main())
84
- except Exception as e:
85
- print("Fatal error:", str(e))
 
1
  import asyncio
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from datasets import load_dataset
4
+
5
+ # -------------------------------
6
+ # Model loading
7
+ # -------------------------------
8
+ MODEL_NAME = "LiquidAI/LFM2-2.6B"
9
 
10
+ print("Loading model...")
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
13
+ print("Model loaded.")
 
 
14
 
15
+ # -------------------------------
16
+ # Fetch prompts from Hugging Face
17
+ # -------------------------------
18
  async def fetch_prompts():
19
+ print("Fetching prompts from Hugging Face dataset...")
 
 
 
20
  dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")
21
+ all_prompts = dataset['prompt'] # List of 200+ prompts
22
+ print(f"Total prompts available: {len(all_prompts)}")
23
+ return all_prompts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # -------------------------------
26
+ # Main async function
27
+ # -------------------------------
28
  async def main():
 
 
 
 
 
29
  all_prompts = await fetch_prompts()
 
30
 
31
+ # Use first 20 for fast startup
32
+ fast_prompts = all_prompts[:20]
33
+ print("Using first 20 prompts for fast startup...")
34
+ for i, p in enumerate(fast_prompts, 1):
35
+ print(f"[Prompt {i}] {p}")
36
 
37
+ # Load remaining prompts asynchronously
38
+ remaining_prompts = all_prompts[20:]
39
+ print(f"Loading remaining {len(remaining_prompts)} prompts asynchronously...")
 
 
40
 
41
+ # Example: You could process remaining prompts in background
42
+ # For demonstration, just print count after "async load"
43
+ await asyncio.sleep(1)
44
+ print("Remaining prompts loaded.")
 
 
 
45
 
46
+ # -------------------------------
47
+ # Entry point
48
+ # -------------------------------
49
  if __name__ == "__main__":
50
+ asyncio.run(main())