Spaces:

Abhi2504
/

Syncnet_FCN

Running

File size: 9,204 Bytes

579f772

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Generate Demo Video for FCN-SyncNet

Creates demonstration videos showing sync detection with different offsets.
Outputs a comparison video and terminal recording for presentation.

Usage:
    python generate_demo.py
    python generate_demo.py --output demo_output/

Author: R-V-Abhishek
"""

import argparse
import os
import subprocess
import sys
import time

import torch


def create_offset_videos(source_video, output_dir, offsets=[0, 5, 12]):
    """Create test videos with known audio offsets."""
    os.makedirs(output_dir, exist_ok=True)
    
    created = []
    for offset in offsets:
        if offset == 0:
            # Copy original
            output_path = os.path.join(output_dir, 'test_offset_0.avi')
            cmd = ['ffmpeg', '-y', '-i', source_video, '-c', 'copy', output_path]
        else:
            # Add audio delay (offset in frames, 40ms per frame at 25fps)
            delay_ms = offset * 40
            output_path = os.path.join(output_dir, f'test_offset_{offset}.avi')
            cmd = ['ffmpeg', '-y', '-i', source_video, 
                   '-af', f'adelay={delay_ms}|{delay_ms}',
                   '-c:v', 'copy', output_path]
        
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        created.append((output_path, offset))
        print(f"  Created: test_offset_{offset}.avi (+{offset} frames)")
    
    return created


def run_demo(model, test_videos, baseline_offset=3):
    """Run detection on test videos and print results."""
    results = []
    
    print()
    print("=" * 70)
    print("  FCN-SyncNet Demo - Audio-Video Sync Detection")
    print("=" * 70)
    print()
    
    for video_path, added_offset in test_videos:
        expected = baseline_offset - added_offset  # Original has +3, adding offset shifts it
        
        offset, conf, raw = model.detect_offset_correlation(
            video_path,
            calibration_offset=3,
            calibration_scale=-0.5,
            calibration_baseline=-15,
            verbose=False
        )
        
        error = abs(offset - expected)
        status = "✓" if error <= 3 else "✗"
        
        result = {
            'video': os.path.basename(video_path),
            'added_offset': added_offset,
            'expected': expected,
            'detected': offset,
            'error': error,
            'status': status
        }
        results.append(result)
        
        print(f"  {status} {result['video']}")
        print(f"      Added offset: +{added_offset} frames")
        print(f"      Expected:     {expected:+d} frames")
        print(f"      Detected:     {offset:+d} frames")
        print(f"      Error:        {error} frames")
        print()
    
    # Summary
    total_error = sum(r['error'] for r in results)
    correct = sum(1 for r in results if r['error'] <= 3)
    
    print("-" * 70)
    print(f"  Summary: {correct}/{len(results)} correct (within 3 frames)")
    print(f"  Total error: {total_error} frames")
    print("=" * 70)
    
    return results


def compare_with_original_syncnet(test_videos, baseline_offset=3):
    """Run original SyncNet for comparison."""
    print()
    print("=" * 70)
    print("  Original SyncNet Comparison")
    print("=" * 70)
    print()
    
    original_results = []
    for video_path, added_offset in test_videos:
        expected = baseline_offset - added_offset
        
        # Run original demo_syncnet.py (use same Python interpreter)
        result = subprocess.run(
            [sys.executable, 'demo_syncnet.py', '--videofile', video_path, 
             '--tmp_dir', 'data/work/pytmp'],
            capture_output=True, text=True
        )
        
        # Parse output
        detected = None
        for line in result.stdout.split('\n'):
            if 'AV offset' in line:
                detected = int(line.split(':')[1].strip())
                break
        
        if detected is not None:
            error = abs(detected - expected)
            status = "✓" if error <= 3 else "✗"
            print(f"  {status} {os.path.basename(video_path)}: detected={detected:+d}, expected={expected:+d}, error={error}")
            original_results.append({'error': error})
        else:
            print(f"  ? {os.path.basename(video_path)}: detection failed")
            original_results.append({'error': None})
    
    print("=" * 70)
    return original_results


def main():
    parser = argparse.ArgumentParser(description='Generate FCN-SyncNet demo')
    parser.add_argument('--output', '-o', default='demo_output',
                       help='Output directory for test videos')
    parser.add_argument('--source', '-s', default='data/example.avi',
                       help='Source video file')
    parser.add_argument('--compare', '-c', action='store_true',
                       help='Also run original SyncNet for comparison')
    parser.add_argument('--cleanup', action='store_true',
                       help='Clean up test videos after demo')
    
    args = parser.parse_args()
    
    print()
    print("╔══════════════════════════════════════════════════════════════════╗")
    print("║         FCN-SyncNet Demo - Audio-Video Sync Detection            ║")
    print("╚══════════════════════════════════════════════════════════════════╝")
    print()
    
    # Check source video
    if not os.path.exists(args.source):
        print(f"Error: Source video not found: {args.source}")
        sys.exit(1)
    
    # Create test videos
    print("Creating test videos with different offsets...")
    test_videos = create_offset_videos(args.source, args.output, offsets=[0, 5, 12])
    
    # Load FCN model
    print()
    print("Loading FCN-SyncNet model...")
    from SyncNetModel_FCN import StreamSyncFCN
    
    model = StreamSyncFCN(max_offset=15, pretrained_syncnet_path=None, auto_load_pretrained=False)
    checkpoint = torch.load('checkpoints/syncnet_fcn_epoch2.pth', map_location='cpu')
    encoder_state = {k: v for k, v in checkpoint['model_state_dict'].items()
                    if 'audio_encoder' in k or 'video_encoder' in k}
    model.load_state_dict(encoder_state, strict=False)
    model.eval()
    print(f"  ✓ Loaded checkpoint (epoch {checkpoint.get('epoch', '?')})")
    
    # Run FCN demo
    fcn_results = run_demo(model, test_videos, baseline_offset=3)
    
    # Optionally compare with original
    original_results = None
    if args.compare:
        original_results = compare_with_original_syncnet(test_videos, baseline_offset=3)
        
        # Print comparison summary
        fcn_errors = [r['error'] for r in fcn_results]
        orig_errors = [r['error'] for r in original_results if r['error'] is not None]
        
        print()
        print("╔══════════════════════════════════════════════════════════════════╗")
        print("║                     Comparison Summary                            ║")
        print("╠══════════════════════════════════════════════════════════════════╣")
        fcn_total = sum(fcn_errors)
        fcn_correct = sum(1 for e in fcn_errors if e <= 3)
        print(f"║  FCN-SyncNet:      {fcn_correct}/{len(fcn_results)} correct, {fcn_total} frames total error        ║")
        if orig_errors:
            orig_total = sum(orig_errors)
            orig_correct = sum(1 for e in orig_errors if e <= 3)
            print(f"║  Original SyncNet: {orig_correct}/{len(orig_errors)} correct, {orig_total} frames total error        ║")
        print("╠══════════════════════════════════════════════════════════════════╣")
        print("║  FCN-SyncNet: Research prototype with real-time capability       ║")
        print("║  Status: Working but needs more training data/epochs             ║")
        print("╚══════════════════════════════════════════════════════════════════╝")
    
    # Cleanup
    if args.cleanup:
        print()
        print("Cleaning up test videos...")
        for video_path, _ in test_videos:
            if os.path.exists(video_path):
                os.remove(video_path)
        if os.path.exists(args.output) and not os.listdir(args.output):
            os.rmdir(args.output)
        print("  Done.")
    
    print()
    print("Demo complete!")
    print()
    
    return 0


if __name__ == '__main__':
    sys.exit(main())