Shago commited on
Commit
e226b59
·
verified ·
1 Parent(s): 715205b

Update llm.py

Browse files
Files changed (1) hide show
  1. llm.py +21 -13
llm.py CHANGED
@@ -7,8 +7,8 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
7
  # Initialize HF pipeline for text generation
8
  text_generator = pipeline(
9
  "text-generation", # Task type
10
- # model="google/gemma-3n-e4b-it",
11
- model="Qwen/Qwen3-Embedding-0.6B",
12
  # device="cuda" if torch.cuda.is_available() else "cpu",
13
  device= "cpu",
14
  torch_dtype=torch.bfloat16,
@@ -20,17 +20,25 @@ model = HuggingFacePipeline(pipeline=text_generator)
20
 
21
  def generate_sentences(topic, n=1):
22
  prompt = ChatPromptTemplate.from_template(
23
- "You are a helpful assistant. Generate exactly {n} simple sentences about the topic: {topic}. "
24
- "Each sentence must be in English and appropriate for all audiences. "
25
- "Return each sentence on a new line without any numbering or bullets"
26
- "Generate exactly {n} simple English sentences about {topic}. "
27
- "Each sentence must:\n"
28
- "- Be appropriate for all audiences\n"
29
- "- Be 5-20 words long\n"
30
- "- Avoid complex vocabulary\n\n"
31
- "### Format\n"
32
- "Return ONLY the sentences, one per line with no numbering."
33
  )
34
  chain = prompt | model | StrOutputParser()
35
  response = chain.invoke({"topic": topic, "n": n})
36
- return [s.strip() for s in response.splitlines() if s.strip()][:n]
 
 
 
 
 
 
 
 
 
7
  # Initialize HF pipeline for text generation
8
  text_generator = pipeline(
9
  "text-generation", # Task type
10
+ model="google/gemma-3n-e4b-it",
11
+ # model="Qwen/Qwen3-Embedding-0.6B",
12
  # device="cuda" if torch.cuda.is_available() else "cpu",
13
  device= "cpu",
14
  torch_dtype=torch.bfloat16,
 
20
 
21
  def generate_sentences(topic, n=1):
22
  prompt = ChatPromptTemplate.from_template(
23
+ "### Instruction\n"
24
+ "Create exactly {n} English sentences about {topic} with these rules:\n"
25
+ "- Maximum 10 words per sentence\n"
26
+ "- Use simple vocabulary\n"
27
+ "- Family-friendly content\n\n"
28
+ "### Output Format\n"
29
+ "Return ONLY the sentences, one per line with:\n"
30
+ "- No bullet points\n"
31
+ "- No numbering\n"
32
+ "- No extra text or explanations"
33
  )
34
  chain = prompt | model | StrOutputParser()
35
  response = chain.invoke({"topic": topic, "n": n})
36
+
37
+ # Enhanced filtering
38
+ return [
39
+ line.strip() for line in response.splitlines()
40
+ if (line.strip()
41
+ and not line.startswith(("###", "Instruction", "Output Format"))
42
+ and len(line.split()) <= 15 # Word limit enforcement
43
+ )
44
+ ][:n]