seanpedrickcase commited on
Commit
38198b1
·
1 Parent(s): f957de1

zeroGPU spaces duration can now be defined from environment variable

Browse files
funcs/embeddings.py CHANGED
@@ -1,19 +1,23 @@
 
1
  import time
2
  import numpy as np
3
  import os
4
- import spaces
5
 
6
  from sentence_transformers import SentenceTransformer
7
  from sklearn.pipeline import make_pipeline
8
  from sklearn.decomposition import TruncatedSVD
9
  from sklearn.feature_extraction.text import TfidfVectorizer
 
 
 
10
 
11
 
12
  # If you want to disable cuda for testing purposes
13
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
14
 
15
 
16
- @spaces.GPU(duration=120)
17
  def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1", random_seed:int=42) -> np.ndarray:
18
  """
19
  Create or load embeddings for the given documents.
 
1
+ import spaces
2
  import time
3
  import numpy as np
4
  import os
5
+
6
 
7
  from sentence_transformers import SentenceTransformer
8
  from sklearn.pipeline import make_pipeline
9
  from sklearn.decomposition import TruncatedSVD
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
+ from funcs.helper_functions import GPU_SPACE_DURATION
12
+
13
+
14
 
15
 
16
  # If you want to disable cuda for testing purposes
17
  #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
18
 
19
 
20
+ @spaces.GPU(duration=GPU_SPACE_DURATION)
21
  def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndarray, embeddings_super_compress: str, high_quality_mode_opt: str, embeddings_name:str="mixedbread-ai/mxbai-embed-xsmall-v1", random_seed:int=42) -> np.ndarray:
22
  """
23
  Create or load embeddings for the given documents.
funcs/helper_functions.py CHANGED
@@ -33,6 +33,9 @@ default_value = 'output/'
33
  output_folder = get_or_create_env_var(env_var_name, default_value)
34
  print(f'The value of {env_var_name} is {output_folder}')
35
 
 
 
 
36
  def ensure_output_folder_exists():
37
  """Checks if the 'output/' folder exists, creates it if not."""
38
 
 
33
  output_folder = get_or_create_env_var(env_var_name, default_value)
34
  print(f'The value of {env_var_name} is {output_folder}')
35
 
36
+ GPU_SPACE_DURATION = int(get_or_create_env_var('GPU_SPACE_DURATION', '60'))
37
+ print(f'The value of GPU_SPACE_DURATION is {GPU_SPACE_DURATION}')
38
+
39
  def ensure_output_folder_exists():
40
  """Checks if the 'output/' folder exists, creates it if not."""
41
 
funcs/topic_core_funcs.py CHANGED
@@ -13,7 +13,7 @@ PandasDataFrame = Type[pd.DataFrame]
13
 
14
  from funcs.clean_funcs import initial_clean, regex_clean
15
  from funcs.anonymiser import expand_sentences_spacy
16
- from funcs.helper_functions import read_file, zip_folder, delete_files_in_folder, save_topic_outputs, output_folder, get_or_create_env_var, custom_regex_load
17
  from funcs.embeddings import make_or_load_embeddings
18
  from funcs.bertopic_vis_documents import visualize_documents_custom, visualize_hierarchical_documents_custom, hierarchical_topics_custom, visualize_hierarchy_custom
19
 
@@ -28,6 +28,7 @@ umap_min_dist = 0.0
28
  umap_metric = 'cosine'
29
  random_seed = 42
30
 
 
31
  today = datetime.now().strftime("%d%m%Y")
32
  today_rev = datetime.now().strftime("%Y%m%d")
33
 
@@ -546,7 +547,7 @@ def reduce_outliers(topic_model: BERTopic, docs: List[str], embeddings_out: np.n
546
 
547
  return output_text, output_list, topic_model
548
 
549
- @spaces.GPU(duration=120)
550
  def represent_topics(topic_model: BERTopic, docs: List[str], data_file_name_no_ext: str, high_quality_mode: str, save_topic_model: str, representation_type: str, vectoriser_model: CountVectorizer, split_sentence_drop: str, data: PandasDataFrame, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> tuple:
551
  """
552
  Represents topics using the specified representation model and updates the topic labels accordingly.
 
13
 
14
  from funcs.clean_funcs import initial_clean, regex_clean
15
  from funcs.anonymiser import expand_sentences_spacy
16
+ from funcs.helper_functions import read_file, zip_folder, delete_files_in_folder, save_topic_outputs, output_folder, get_or_create_env_var, custom_regex_load, GPU_SPACE_DURATION
17
  from funcs.embeddings import make_or_load_embeddings
18
  from funcs.bertopic_vis_documents import visualize_documents_custom, visualize_hierarchical_documents_custom, hierarchical_topics_custom, visualize_hierarchy_custom
19
 
 
28
  umap_metric = 'cosine'
29
  random_seed = 42
30
 
31
+
32
  today = datetime.now().strftime("%d%m%Y")
33
  today_rev = datetime.now().strftime("%Y%m%d")
34
 
 
547
 
548
  return output_text, output_list, topic_model
549
 
550
+ @spaces.GPU(duration=GPU_SPACE_DURATION)
551
  def represent_topics(topic_model: BERTopic, docs: List[str], data_file_name_no_ext: str, high_quality_mode: str, save_topic_model: str, representation_type: str, vectoriser_model: CountVectorizer, split_sentence_drop: str, data: PandasDataFrame, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> tuple:
552
  """
553
  Represents topics using the specified representation model and updates the topic labels accordingly.