Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import subprocess | |
| import pkg_resources | |
| def is_package_installed(package_name): | |
| try: | |
| pkg_resources.get_distribution(package_name) | |
| return True | |
| except pkg_resources.DistributionNotFound: | |
| return False | |
| if is_package_installed("mmcv"): | |
| print("MMCV is installed.") | |
| else: | |
| print("MMCV is not installed. Build it from the source.") | |
| os.environ["MMCV_WITH_OPS"] = "1" | |
| os.environ["FORCE_MLU"] = "1" | |
| subprocess.run(["pip", "install", "-e", "./mmcv"], check=True) | |
| subprocess.run(["pip", "list"], check=True) | |
| if is_package_installed("pytorch3d"): | |
| print("pytorch3d is installed.") | |
| else: | |
| print("pytorch3d is not installed. Build it from the source.") | |
| subprocess.run(["pip", "install", "-e", "./pytorch3d"], check=True) | |
| if is_package_installed("MultiScaleDeformableAttention"): | |
| print("MultiScaleDeformableAttention is installed.") | |
| else: | |
| print("MultiScaleDeformableAttention is not installed. Build it from the source.") | |
| subprocess.run(["pip", "install", "-e", "./models/aios/ops"], check=True) | |
| import os.path as osp | |
| from pathlib import Path | |
| import cv2 | |
| import gradio as gr | |
| import torch | |
| import math | |
| import spaces | |
| from huggingface_hub import hf_hub_download | |
| hf_hub_download(repo_id="ttxskk/AiOS", filename="aios_checkpoint.pth", local_dir="/home/user/app/pretrained_models") | |
| OUT_FOLDER = '/home/user/app/demo_out' | |
| os.makedirs(OUT_FOLDER, exist_ok=True) | |
| DEMO_CONFIG = '/home/user/app/config/aios_smplx_demo.py' | |
| MODEL_PATH = '/home/user/app/pretrained_models/aios_checkpoint.pth' | |
| def infer(video_input, batch_size, threshold=0.3, num_person=1): | |
| os.system(f'rm -rf {OUT_FOLDER}/*') | |
| os.system(f'torchrun --nproc_per_node 1 \ | |
| main.py \ | |
| -c {DEMO_CONFIG} \ | |
| --options batch_size={batch_size} backbone="resnet50" num_person={num_person} threshold={threshold} \ | |
| --resume {MODEL_PATH} \ | |
| --eval \ | |
| --inference \ | |
| --inference_input {video_input} \ | |
| --to_vid \ | |
| --output_dir {OUT_FOLDER}') | |
| video_path = os.path.join(OUT_FOLDER, 'demo_vid.mp4') | |
| save_path_img = os.path.join(OUT_FOLDER, 'res_img') | |
| save_path_mesh = os.path.join(OUT_FOLDER, 'mesh') | |
| save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip') | |
| os.system(f'zip -r {save_mesh_file} {save_path_mesh}') | |
| yield video_path, save_mesh_file | |
| TITLE = """ | |
| <div style="display: flex; justify-content: center; align-items: center; text-align: center;"> | |
| <div> | |
| <h1 align="center">AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation</h1> | |
| </div> | |
| </div> | |
| <div style="display: flex; justify-content: center; align-items: center; text-align: center;"> | |
| <div style="display:flex; gap: 0.25rem;" align="center"> | |
| <a href="https://ttxskk.github.io/AiOS/" target="_blank"><img src='https://img.shields.io/badge/Project-Page-Green'></a> | |
| <a href="https://github.com/ttxskk/AiOS" target="_blank"><img src='https://img.shields.io/badge/Github-Code-blue'></a> | |
| <a href="https://ttxskk.github.io/AiOS/assets/aios_cvpr24.pdf" target="_blank"><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a> | |
| </div> | |
| </div> | |
| <div style="font-size: 1.1rem; color: #555; max-width: 800px; margin: 1rem auto; line-height: 1.5; justify-content: center; align-items: center; text-align: center;"> | |
| <div> | |
| <p>Recover multiple expressive human pose and shape from an RGB image without any additional requirements, such as an off-the-shelf detection model.</h1> | |
| </div> | |
| </div> | |
| """ | |
| VIDEO = ''' | |
| <center> | |
| <iframe width="960" height="540" | |
| src="https://www.youtube.com/embed/yzCL7TYpzvc?si=EoxWNE6VPBxsy7Go" | |
| title="AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation" | |
| frameborder="0" | |
| allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" | |
| allowfullscreen> | |
| </iframe> | |
| </center><br/> | |
| ''' | |
| DESCRIPTION = ''' | |
| <p> | |
| Note: Score threshold defines the minimum confidence level for person detection. The default value is 0.3. | |
| If the confidence score of a detected person falls below this score threshold, the detection will be discarded. | |
| </p> | |
| ''' | |
| with gr.Blocks(title="AiOS", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo: | |
| gr.Markdown(TITLE) | |
| gr.HTML(VIDEO) | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| video_input = gr.Video(label="Input video", elem_classes="video") | |
| with gr.Column(scale=1): | |
| batch_size = gr.Textbox(label="Batch Size", type="text", value=16) | |
| num_person = gr.Textbox(label="Number of Person", type="text", value=1) | |
| threshold = gr.Slider(0, 1.0, value=0.3, label='Score Threshold') | |
| send_button = gr.Button("Infer") | |
| gr.HTML("""<br/>""") | |
| with gr.Row(): | |
| with gr.Column(): | |
| # processed_frames = gr.Image(label="Last processed frame") | |
| video_output = gr.Video(elem_classes="video") | |
| with gr.Column(): | |
| meshes_output = gr.File(label="3D meshes") | |
| send_button.click(fn=infer, inputs=[video_input, batch_size, threshold, num_person], outputs=[video_output, meshes_output]) | |
| # example_videos = gr.Examples([ | |
| # ['./assets/01.mp4'], | |
| # ['./assets/02.mp4'], | |
| # ['./assets/03.mp4'], | |
| # ['./assets/04.mp4'], | |
| # ['./assets/05.mp4'], | |
| # ['./assets/06.mp4'], | |
| # ['./assets/07.mp4'], | |
| # ['./assets/08.mp4'], | |
| # ['./assets/09.mp4'], | |
| # ], | |
| # inputs=[video_input, 0.5]) | |
| demo.queue().launch(debug=True) | |