File size: 6,392 Bytes
579f772
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
cleanup_for_submission.py - Prepare repository for submission

This script cleans up unnecessary files while preserving the best trained model.

Usage:
    # Dry run (shows what would be deleted)
    python cleanup_for_submission.py --dry-run
    
    # Actually clean up
    python cleanup_for_submission.py --execute
    
    # Keep only the best model checkpoint
    python cleanup_for_submission.py --execute --keep-best

Author: R V Abhishek
"""

import os
import shutil
import argparse
import glob

# Directories to clean
CLEANUP_DIRS = [
    'temp_dataset',
    'temp',
    'temp_eval',
    'temp_hls',
    '__pycache__',
    '.history',
    'data/work',
]

# File patterns to remove
CLEANUP_PATTERNS = [
    '*.pyc',
    '*.pyo',
    '*.tmp',
    '*.temp',
    '*_audio.wav',  # Temp audio files
]

# Checkpoint directories
CHECKPOINT_DIRS = [
    'checkpoints',
    'checkpoints_attention', 
    'checkpoints_regression',
]

# Files to keep (important)
KEEP_FILES = [
    'syncnet_fcn_best.pth',  # Best trained model
    'syncnet_v2.model',       # Pretrained base model
]


def get_size_mb(path):
    """Get size of file or directory in MB."""
    if os.path.isfile(path):
        return os.path.getsize(path) / (1024 * 1024)
    elif os.path.isdir(path):
        total = 0
        for dirpath, dirnames, filenames in os.walk(path):
            for f in filenames:
                fp = os.path.join(dirpath, f)
                if os.path.isfile(fp):
                    total += os.path.getsize(fp)
        return total / (1024 * 1024)
    return 0


def cleanup(dry_run=True, keep_best=True, verbose=True):
    """
    Clean up unnecessary files.
    
    Args:
        dry_run: If True, only show what would be deleted
        keep_best: If True, keep the best checkpoint
        verbose: Print detailed info
    """
    base_dir = os.path.dirname(os.path.abspath(__file__))
    
    print("="*60)
    print("FCN-SyncNet Cleanup Script")
    print("="*60)
    print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
    print(f"Keep best model: {keep_best}")
    print()
    
    total_size = 0
    items_to_remove = []
    
    # 1. Clean temp directories
    print("📁 Temporary Directories:")
    for dir_name in CLEANUP_DIRS:
        dir_path = os.path.join(base_dir, dir_name)
        if os.path.exists(dir_path):
            size = get_size_mb(dir_path)
            total_size += size
            items_to_remove.append(('dir', dir_path))
            print(f"   [DELETE] {dir_name}/ ({size:.2f} MB)")
        else:
            if verbose:
                print(f"   [SKIP] {dir_name}/ (not found)")
    print()
    
    # 2. Clean file patterns
    print("📄 Temporary Files:")
    for pattern in CLEANUP_PATTERNS:
        matches = glob.glob(os.path.join(base_dir, '**', pattern), recursive=True)
        for match in matches:
            size = get_size_mb(match)
            total_size += size
            items_to_remove.append(('file', match))
            rel_path = os.path.relpath(match, base_dir)
            print(f"   [DELETE] {rel_path} ({size:.2f} MB)")
    print()
    
    # 3. Handle checkpoints
    print("🔧 Checkpoint Directories:")
    for ckpt_dir in CHECKPOINT_DIRS:
        ckpt_path = os.path.join(base_dir, ckpt_dir)
        if os.path.exists(ckpt_path):
            # List checkpoint files
            ckpt_files = glob.glob(os.path.join(ckpt_path, '*.pth'))
            
            for ckpt_file in ckpt_files:
                filename = os.path.basename(ckpt_file)
                size = get_size_mb(ckpt_file)
                
                # Keep best model if requested
                if keep_best and filename in KEEP_FILES:
                    print(f"   [KEEP] {ckpt_dir}/{filename} ({size:.2f} MB)")
                else:
                    total_size += size
                    items_to_remove.append(('file', ckpt_file))
                    print(f"   [DELETE] {ckpt_dir}/{filename} ({size:.2f} MB)")
    print()
    
    # Summary
    print("="*60)
    print(f"Total space to free: {total_size:.2f} MB")
    print(f"Items to remove: {len(items_to_remove)}")
    print("="*60)
    
    if dry_run:
        print("\n⚠️  DRY RUN - No files were deleted.")
        print("    Run with --execute to actually delete files.")
        return
    
    # Confirm
    if not dry_run:
        confirm = input("\n⚠️  Are you sure you want to delete these files? (yes/no): ")
        if confirm.lower() != 'yes':
            print("Cancelled.")
            return
    
    # Execute cleanup
    print("\n🧹 Cleaning up...")
    deleted_count = 0
    error_count = 0
    
    for item_type, item_path in items_to_remove:
        try:
            if item_type == 'dir':
                shutil.rmtree(item_path)
            else:
                os.remove(item_path)
            deleted_count += 1
            if verbose:
                print(f"   ✓ Deleted: {os.path.relpath(item_path, base_dir)}")
        except Exception as e:
            error_count += 1
            print(f"   ✗ Error deleting {item_path}: {e}")
    
    print()
    print("="*60)
    print(f"✅ Cleanup complete!")
    print(f"   Deleted: {deleted_count} items")
    print(f"   Errors: {error_count}")
    print(f"   Space freed: ~{total_size:.2f} MB")
    print("="*60)


def main():
    parser = argparse.ArgumentParser(description='Cleanup script for submission')
    parser.add_argument('--dry-run', action='store_true', default=True,
                       help='Show what would be deleted without deleting (default)')
    parser.add_argument('--execute', action='store_true',
                       help='Actually delete files')
    parser.add_argument('--keep-best', action='store_true', default=True,
                       help='Keep the best model checkpoint (default: True)')
    parser.add_argument('--delete-all-checkpoints', action='store_true',
                       help='Delete ALL checkpoints including best model')
    parser.add_argument('--quiet', action='store_true',
                       help='Less verbose output')
    
    args = parser.parse_args()
    
    dry_run = not args.execute
    keep_best = not args.delete_all_checkpoints
    verbose = not args.quiet
    
    cleanup(dry_run=dry_run, keep_best=keep_best, verbose=verbose)


if __name__ == '__main__':
    main()