from huggingface_hub import list_repo_files from collections import Counter REPO="SGTLIM/ucf101_eval_unified" files = list_repo_files(repo_id=REPO, repo_type="dataset") mp4s = [f for f in files if f.lower().endswith(".mp4")] by_top = Counter(f.split("/",1)[0] for f in mp4s) print(by_top) # 각 최상위 폴더별 mp4 개수 # 액션 파싱(두 자리 인덱스 앞 토큰) import re def act(p): stem = p.rsplit("/",1)[-1].rsplit(".",1)[0].split("_") for i,t in enumerate(stem): if re.fullmatch(r"\d{2,3}", t): return stem[i-1] if i>0 else None return None for folder in ["Wan2.2","RunwayGen4","Hunyuan_videos","Opensora_768","wan21_videos"]: xs = [f for f in mp4s if f.startswith(folder + "/")] print(folder, len(xs), "files")