init
This commit is contained in:
101
check_sequences.py
Normal file
101
check_sequences.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Diagnostic script to check which sequences are in input vs output.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
def get_sequences(directory):
|
||||
"""Get all sequence folders (directories containing PNG files)."""
|
||||
sequences = defaultdict(list)
|
||||
root = Path(directory)
|
||||
|
||||
if not root.exists():
|
||||
return sequences
|
||||
|
||||
for png_file in root.rglob('*.png'):
|
||||
# Get the sequence folder (parent directory)
|
||||
seq_folder = png_file.parent
|
||||
relative_seq = seq_folder.relative_to(root)
|
||||
sequences[str(relative_seq)].append(png_file)
|
||||
|
||||
return sequences
|
||||
|
||||
def main():
|
||||
input_dir = Path('input')
|
||||
output_dir = Path('output')
|
||||
|
||||
print("=" * 80)
|
||||
print("SEQUENCE DIAGNOSTIC REPORT")
|
||||
print("=" * 80)
|
||||
|
||||
input_sequences = get_sequences(input_dir)
|
||||
output_sequences = get_sequences(output_dir)
|
||||
|
||||
print(f"\nInput sequences: {len(input_sequences)}")
|
||||
print(f"Output sequences: {len(output_sequences)}")
|
||||
|
||||
# Find missing sequences
|
||||
missing = set(input_sequences.keys()) - set(output_sequences.keys())
|
||||
|
||||
if missing:
|
||||
print(f"\n{'=' * 80}")
|
||||
print(f"MISSING SEQUENCES ({len(missing)}):")
|
||||
print(f"{'=' * 80}")
|
||||
for seq in sorted(missing):
|
||||
png_count = len(input_sequences[seq])
|
||||
print(f" - {seq} ({png_count} PNG files)")
|
||||
|
||||
# Check if folder exists in output
|
||||
output_seq_path = output_dir / seq
|
||||
if output_seq_path.exists():
|
||||
files_in_output = list(output_seq_path.iterdir())
|
||||
print(f" Output folder exists but has {len(files_in_output)} files")
|
||||
if files_in_output:
|
||||
print(f" Files: {[f.name for f in files_in_output[:5]]}")
|
||||
else:
|
||||
print(f" Output folder does not exist")
|
||||
else:
|
||||
print("\nAll input sequences have corresponding output sequences.")
|
||||
|
||||
# Find sequences with different file counts
|
||||
print(f"\n{'=' * 80}")
|
||||
print("SEQUENCE FILE COUNT COMPARISON:")
|
||||
print(f"{'=' * 80}")
|
||||
for seq in sorted(input_sequences.keys()):
|
||||
input_count = len(input_sequences[seq])
|
||||
output_count = len(output_sequences.get(seq, []))
|
||||
status = "OK" if input_count == output_count else "DIFF"
|
||||
print(f"{status:4s} {seq}: {input_count} input -> {output_count} output")
|
||||
if input_count != output_count and seq in output_sequences:
|
||||
print(f" Difference: {input_count - output_count} files missing")
|
||||
|
||||
# Check for folders without PNG files
|
||||
print(f"\n{'=' * 80}")
|
||||
print("ALL FOLDERS IN INPUT DIRECTORY:")
|
||||
print(f"{'=' * 80}")
|
||||
input_root = Path('input')
|
||||
if input_root.exists():
|
||||
# Get all top-level directories
|
||||
top_level_dirs = [d for d in input_root.iterdir() if d.is_dir()]
|
||||
print(f"Total top-level folders: {len(top_level_dirs)}")
|
||||
print()
|
||||
|
||||
folders_with_pngs = set()
|
||||
for seq_name, png_files in input_sequences.items():
|
||||
if png_files:
|
||||
# Get the folder path from the first PNG file
|
||||
folder_path = png_files[0].parent.relative_to(input_root)
|
||||
folders_with_pngs.add(str(folder_path))
|
||||
|
||||
for folder in sorted(top_level_dirs):
|
||||
rel_folder = folder.relative_to(input_root)
|
||||
has_pngs = str(rel_folder) in folders_with_pngs
|
||||
png_count = len(list(folder.rglob('*.png')))
|
||||
status = "HAS PNGs" if has_pngs else "NO PNGs"
|
||||
print(f"{status:10s} {rel_folder} ({png_count} PNG files)")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user