File size: 773 Bytes
f543cdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from huggingface_hub import list_repo_files
from collections import Counter

REPO="SGTLIM/ucf101_eval_unified"
files = list_repo_files(repo_id=REPO, repo_type="dataset")
mp4s = [f for f in files if f.lower().endswith(".mp4")]
by_top = Counter(f.split("/",1)[0] for f in mp4s)
print(by_top)  # 각 μ΅œμƒμœ„ 폴더별 mp4 개수

# μ•‘μ…˜ νŒŒμ‹±(두 자리 인덱슀 μ•ž 토큰)
import re
def act(p):
    stem = p.rsplit("/",1)[-1].rsplit(".",1)[0].split("_")
    for i,t in enumerate(stem):
        if re.fullmatch(r"\d{2,3}", t):
            return stem[i-1] if i>0 else None
    return None

for folder in ["Wan2.2","RunwayGen4","Hunyuan_videos","Opensora_768","wan21_videos"]:
    xs = [f for f in mp4s if f.startswith(folder + "/")]
    print(folder, len(xs), "files")