Spaces:

rphrp1985
/

zerogpu

Running on Zero

rphrp1985 commited on Jun 12, 2024

Commit

17749ab

verified ·

1 Parent(s): 8e94850

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,22 @@ from torch.cuda.amp import autocast
 import subprocess
 from huggingface_hub import InferenceClient
 import os
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
@@ -78,7 +87,6 @@ def respond(
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
 ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
     # with autocast():
-    model= model.to('cuda')
     gen_tokens = model.generate(
     input_ids,
     max_new_tokens=100,

 import subprocess
 from huggingface_hub import InferenceClient
 import os
+import psutil
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
+subprocess.run(
+    "pip install psutil",
+    shell=True,
+)
+ram_info = psutil.virtual_memory()
+print(f"Total RAM: {ram_info.total / (1024.0 ** 3)} GB")
+print(f"Available RAM: {ram_info.available / (1024.0 ** 3)} GB")
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
 ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
     # with autocast():
     gen_tokens = model.generate(
     input_ids,
     max_new_tokens=100,