Spaces:
Running
Running
File size: 6,392 Bytes
579f772 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
cleanup_for_submission.py - Prepare repository for submission
This script cleans up unnecessary files while preserving the best trained model.
Usage:
# Dry run (shows what would be deleted)
python cleanup_for_submission.py --dry-run
# Actually clean up
python cleanup_for_submission.py --execute
# Keep only the best model checkpoint
python cleanup_for_submission.py --execute --keep-best
Author: R V Abhishek
"""
import os
import shutil
import argparse
import glob
# Directories to clean
CLEANUP_DIRS = [
'temp_dataset',
'temp',
'temp_eval',
'temp_hls',
'__pycache__',
'.history',
'data/work',
]
# File patterns to remove
CLEANUP_PATTERNS = [
'*.pyc',
'*.pyo',
'*.tmp',
'*.temp',
'*_audio.wav', # Temp audio files
]
# Checkpoint directories
CHECKPOINT_DIRS = [
'checkpoints',
'checkpoints_attention',
'checkpoints_regression',
]
# Files to keep (important)
KEEP_FILES = [
'syncnet_fcn_best.pth', # Best trained model
'syncnet_v2.model', # Pretrained base model
]
def get_size_mb(path):
"""Get size of file or directory in MB."""
if os.path.isfile(path):
return os.path.getsize(path) / (1024 * 1024)
elif os.path.isdir(path):
total = 0
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
if os.path.isfile(fp):
total += os.path.getsize(fp)
return total / (1024 * 1024)
return 0
def cleanup(dry_run=True, keep_best=True, verbose=True):
"""
Clean up unnecessary files.
Args:
dry_run: If True, only show what would be deleted
keep_best: If True, keep the best checkpoint
verbose: Print detailed info
"""
base_dir = os.path.dirname(os.path.abspath(__file__))
print("="*60)
print("FCN-SyncNet Cleanup Script")
print("="*60)
print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
print(f"Keep best model: {keep_best}")
print()
total_size = 0
items_to_remove = []
# 1. Clean temp directories
print("📁 Temporary Directories:")
for dir_name in CLEANUP_DIRS:
dir_path = os.path.join(base_dir, dir_name)
if os.path.exists(dir_path):
size = get_size_mb(dir_path)
total_size += size
items_to_remove.append(('dir', dir_path))
print(f" [DELETE] {dir_name}/ ({size:.2f} MB)")
else:
if verbose:
print(f" [SKIP] {dir_name}/ (not found)")
print()
# 2. Clean file patterns
print("📄 Temporary Files:")
for pattern in CLEANUP_PATTERNS:
matches = glob.glob(os.path.join(base_dir, '**', pattern), recursive=True)
for match in matches:
size = get_size_mb(match)
total_size += size
items_to_remove.append(('file', match))
rel_path = os.path.relpath(match, base_dir)
print(f" [DELETE] {rel_path} ({size:.2f} MB)")
print()
# 3. Handle checkpoints
print("🔧 Checkpoint Directories:")
for ckpt_dir in CHECKPOINT_DIRS:
ckpt_path = os.path.join(base_dir, ckpt_dir)
if os.path.exists(ckpt_path):
# List checkpoint files
ckpt_files = glob.glob(os.path.join(ckpt_path, '*.pth'))
for ckpt_file in ckpt_files:
filename = os.path.basename(ckpt_file)
size = get_size_mb(ckpt_file)
# Keep best model if requested
if keep_best and filename in KEEP_FILES:
print(f" [KEEP] {ckpt_dir}/{filename} ({size:.2f} MB)")
else:
total_size += size
items_to_remove.append(('file', ckpt_file))
print(f" [DELETE] {ckpt_dir}/{filename} ({size:.2f} MB)")
print()
# Summary
print("="*60)
print(f"Total space to free: {total_size:.2f} MB")
print(f"Items to remove: {len(items_to_remove)}")
print("="*60)
if dry_run:
print("\n⚠️ DRY RUN - No files were deleted.")
print(" Run with --execute to actually delete files.")
return
# Confirm
if not dry_run:
confirm = input("\n⚠️ Are you sure you want to delete these files? (yes/no): ")
if confirm.lower() != 'yes':
print("Cancelled.")
return
# Execute cleanup
print("\n🧹 Cleaning up...")
deleted_count = 0
error_count = 0
for item_type, item_path in items_to_remove:
try:
if item_type == 'dir':
shutil.rmtree(item_path)
else:
os.remove(item_path)
deleted_count += 1
if verbose:
print(f" ✓ Deleted: {os.path.relpath(item_path, base_dir)}")
except Exception as e:
error_count += 1
print(f" ✗ Error deleting {item_path}: {e}")
print()
print("="*60)
print(f"✅ Cleanup complete!")
print(f" Deleted: {deleted_count} items")
print(f" Errors: {error_count}")
print(f" Space freed: ~{total_size:.2f} MB")
print("="*60)
def main():
parser = argparse.ArgumentParser(description='Cleanup script for submission')
parser.add_argument('--dry-run', action='store_true', default=True,
help='Show what would be deleted without deleting (default)')
parser.add_argument('--execute', action='store_true',
help='Actually delete files')
parser.add_argument('--keep-best', action='store_true', default=True,
help='Keep the best model checkpoint (default: True)')
parser.add_argument('--delete-all-checkpoints', action='store_true',
help='Delete ALL checkpoints including best model')
parser.add_argument('--quiet', action='store_true',
help='Less verbose output')
args = parser.parse_args()
dry_run = not args.execute
keep_best = not args.delete_all_checkpoints
verbose = not args.quiet
cleanup(dry_run=dry_run, keep_best=keep_best, verbose=verbose)
if __name__ == '__main__':
main()
|