Kevin Hu
commited on
Commit
·
d719333
1
Parent(s):
286159b
enable 3 char words to finegrind tokenize (#2210)
Browse files### What problem does this PR solve?
### Type of change
- [x] Performance Improvement
- rag/nlp/query.py +1 -1
rag/nlp/query.py
CHANGED
|
@@ -83,7 +83,7 @@ class EsQueryer:
|
|
| 83 |
), tks
|
| 84 |
|
| 85 |
def need_fine_grained_tokenize(tk):
|
| 86 |
-
if len(tk) <
|
| 87 |
return False
|
| 88 |
if re.match(r"[0-9a-z\.\+#_\*-]+$", tk):
|
| 89 |
return False
|
|
|
|
| 83 |
), tks
|
| 84 |
|
| 85 |
def need_fine_grained_tokenize(tk):
|
| 86 |
+
if len(tk) < 3:
|
| 87 |
return False
|
| 88 |
if re.match(r"[0-9a-z\.\+#_\*-]+$", tk):
|
| 89 |
return False
|