#!/usr/bin/env python3 """ Diagnostic script to check which sequences are in input vs output. """ from pathlib import Path from collections import defaultdict def get_sequences(directory): """Get all sequence folders (directories containing PNG files).""" sequences = defaultdict(list) root = Path(directory) if not root.exists(): return sequences for png_file in root.rglob('*.png'): # Get the sequence folder (parent directory) seq_folder = png_file.parent relative_seq = seq_folder.relative_to(root) sequences[str(relative_seq)].append(png_file) return sequences def main(): input_dir = Path('input') output_dir = Path('output') print("=" * 80) print("SEQUENCE DIAGNOSTIC REPORT") print("=" * 80) input_sequences = get_sequences(input_dir) output_sequences = get_sequences(output_dir) print(f"\nInput sequences: {len(input_sequences)}") print(f"Output sequences: {len(output_sequences)}") # Find missing sequences missing = set(input_sequences.keys()) - set(output_sequences.keys()) if missing: print(f"\n{'=' * 80}") print(f"MISSING SEQUENCES ({len(missing)}):") print(f"{'=' * 80}") for seq in sorted(missing): png_count = len(input_sequences[seq]) print(f" - {seq} ({png_count} PNG files)") # Check if folder exists in output output_seq_path = output_dir / seq if output_seq_path.exists(): files_in_output = list(output_seq_path.iterdir()) print(f" Output folder exists but has {len(files_in_output)} files") if files_in_output: print(f" Files: {[f.name for f in files_in_output[:5]]}") else: print(f" Output folder does not exist") else: print("\nAll input sequences have corresponding output sequences.") # Find sequences with different file counts print(f"\n{'=' * 80}") print("SEQUENCE FILE COUNT COMPARISON:") print(f"{'=' * 80}") for seq in sorted(input_sequences.keys()): input_count = len(input_sequences[seq]) output_count = len(output_sequences.get(seq, [])) status = "OK" if input_count == output_count else "DIFF" print(f"{status:4s} {seq}: {input_count} input -> {output_count} output") if input_count != output_count and seq in output_sequences: print(f" Difference: {input_count - output_count} files missing") # Check for folders without PNG files print(f"\n{'=' * 80}") print("ALL FOLDERS IN INPUT DIRECTORY:") print(f"{'=' * 80}") input_root = Path('input') if input_root.exists(): # Get all top-level directories top_level_dirs = [d for d in input_root.iterdir() if d.is_dir()] print(f"Total top-level folders: {len(top_level_dirs)}") print() folders_with_pngs = set() for seq_name, png_files in input_sequences.items(): if png_files: # Get the folder path from the first PNG file folder_path = png_files[0].parent.relative_to(input_root) folders_with_pngs.add(str(folder_path)) for folder in sorted(top_level_dirs): rel_folder = folder.relative_to(input_root) has_pngs = str(rel_folder) in folders_with_pngs png_count = len(list(folder.rglob('*.png'))) status = "HAS PNGs" if has_pngs else "NO PNGs" print(f"{status:10s} {rel_folder} ({png_count} PNG files)") if __name__ == '__main__': main()