detect and quarantine corrupt pngs
This commit is contained in:
123
compress_pngs.py
123
compress_pngs.py
@@ -31,13 +31,55 @@ def compress_png(input_path, output_path, force_bitdepth=None):
|
||||
new_size = os.path.getsize(output_path)
|
||||
savings = original_size - new_size
|
||||
savings_pct = (savings / original_size * 100) if original_size > 0 else 0
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, True)
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, True, False)
|
||||
|
||||
# Ensure output directory exists
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
original_size = os.path.getsize(input_path)
|
||||
img = Image.open(input_path)
|
||||
|
||||
# Check for corrupted/empty files
|
||||
if original_size == 0:
|
||||
return (str(input_path), False, "CORRUPTED: File is 0 bytes (empty/placeholder)", 0, 0, 0, False, True)
|
||||
|
||||
# Try to open and validate the image
|
||||
try:
|
||||
img = Image.open(input_path)
|
||||
# Force load to detect corruption early
|
||||
img.load()
|
||||
except Exception as e:
|
||||
return (str(input_path), False, f"CORRUPTED: Cannot open/load image - {str(e)}", original_size, 0, 0, False, True)
|
||||
|
||||
# Validate image dimensions
|
||||
if img.width == 0 or img.height == 0:
|
||||
return (str(input_path), False, f"CORRUPTED: Invalid dimensions ({img.width}x{img.height})", original_size, 0, 0, False, True)
|
||||
|
||||
# Check if image appears completely black (potential corruption indicator)
|
||||
# But be careful - some images might legitimately be black
|
||||
# We'll only flag this if the file is suspiciously small for its dimensions
|
||||
try:
|
||||
# Sample a few pixels to check if all are black
|
||||
sample_size = min(100, img.width * img.height)
|
||||
pixels = list(img.getdata()[:sample_size])
|
||||
if pixels:
|
||||
# Check if all sampled pixels are black (0 or (0,0,0) or (0,0,0,0))
|
||||
all_black = True
|
||||
for pixel in pixels:
|
||||
if isinstance(pixel, (tuple, list)):
|
||||
if any(p > 0 for p in pixel):
|
||||
all_black = False
|
||||
break
|
||||
else:
|
||||
if pixel > 0:
|
||||
all_black = False
|
||||
break
|
||||
|
||||
# If all black AND file is suspiciously small, flag as potentially corrupted
|
||||
if all_black and original_size < (img.width * img.height * 0.1): # Less than 0.1 bytes per pixel
|
||||
return (str(input_path), False, f"CORRUPTED: Image appears all black with suspiciously small file size", original_size, 0, 0, False, True)
|
||||
except:
|
||||
# If we can't check pixels, continue anyway
|
||||
pass
|
||||
|
||||
# Determine target bit depth
|
||||
if force_bitdepth == '8':
|
||||
@@ -131,9 +173,11 @@ def compress_png(input_path, output_path, force_bitdepth=None):
|
||||
new_size = os.path.getsize(output_path)
|
||||
savings = original_size - new_size
|
||||
savings_pct = (savings / original_size * 100) if original_size > 0 else 0
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, False)
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, False, False)
|
||||
except Exception as e:
|
||||
return (str(input_path), False, str(e), 0, 0, 0, False)
|
||||
# Check if error might indicate corruption
|
||||
is_corrupted = "truncated" in str(e).lower() or "cannot identify" in str(e).lower() or "corrupt" in str(e).lower()
|
||||
return (str(input_path), False, str(e), 0, 0, 0, False, is_corrupted)
|
||||
|
||||
def find_image_files(input_dir):
|
||||
"""Find all PNG and JPG files in subdirectories."""
|
||||
@@ -163,6 +207,31 @@ def get_output_path(input_path, input_dir, output_dir):
|
||||
# Create output path
|
||||
return output_dir / relative_path
|
||||
|
||||
def move_to_corrupted(input_path, input_dir, corrupted_dir):
|
||||
"""Move a corrupted file to the corrupted folder, preserving directory structure."""
|
||||
try:
|
||||
input_path = Path(input_path)
|
||||
input_dir = Path(input_dir)
|
||||
corrupted_dir = Path(corrupted_dir)
|
||||
|
||||
# Get relative path from input directory
|
||||
try:
|
||||
relative_path = input_path.relative_to(input_dir)
|
||||
except ValueError:
|
||||
relative_path = input_path.name
|
||||
|
||||
# Create destination path
|
||||
dest_path = corrupted_dir / relative_path
|
||||
|
||||
# Ensure destination directory exists
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Move the file
|
||||
input_path.rename(dest_path)
|
||||
return True, None
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
def format_size(size_bytes):
|
||||
"""Format file size in human readable format."""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
@@ -235,8 +304,11 @@ def main():
|
||||
|
||||
print(f"Found {len(png_files)} PNG files to process.")
|
||||
|
||||
# Create output directory
|
||||
# Create output and corrupted directories
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
corrupted_dir = Path('corrupted')
|
||||
corrupted_dir.mkdir(exist_ok=True)
|
||||
print(f"Corrupted files will be moved to: {corrupted_dir}")
|
||||
|
||||
# Use all available CPU cores
|
||||
max_workers = multiprocessing.cpu_count()
|
||||
@@ -246,6 +318,8 @@ def main():
|
||||
compressed = 0
|
||||
skipped = 0
|
||||
failed = 0
|
||||
corrupted = 0
|
||||
corrupted_files = []
|
||||
total_original_size = 0
|
||||
total_new_size = 0
|
||||
start_time = time.time()
|
||||
@@ -261,7 +335,7 @@ def main():
|
||||
# Process results as they complete
|
||||
for future in as_completed(future_to_file):
|
||||
result = future.result()
|
||||
file_path, success, error, orig_size, new_size, savings_pct, was_skipped = result
|
||||
file_path, success, error, orig_size, new_size, savings_pct, was_skipped, is_corrupted = result
|
||||
|
||||
if success:
|
||||
if was_skipped:
|
||||
@@ -280,7 +354,7 @@ def main():
|
||||
# Update every file or every 0.5 seconds, whichever comes first
|
||||
if processed == 1 or time_since_update >= 0.5:
|
||||
rate = processed / elapsed if elapsed > 0 else 0
|
||||
remaining = len(png_files) - processed
|
||||
remaining = len(png_files) - processed - failed
|
||||
eta_seconds = remaining / rate if rate > 0 and remaining > 0 else 0
|
||||
|
||||
total_savings = total_original_size - total_new_size
|
||||
@@ -290,14 +364,25 @@ def main():
|
||||
elapsed_str = format_time(elapsed)
|
||||
|
||||
print(f"[{processed:5d}/{len(png_files)}] "
|
||||
f"Compressed: {compressed} | Skipped: {skipped} | "
|
||||
f"Compressed: {compressed} | Skipped: {skipped} | Corrupted: {corrupted} | "
|
||||
f"Speed: {rate:.1f} files/sec | "
|
||||
f"Elapsed: {elapsed_str} | ETA: {eta_str} | "
|
||||
f"Saved: {format_size(total_savings)} ({total_savings_pct:.1f}%)", end='\r')
|
||||
last_update_time = current_time
|
||||
else:
|
||||
failed += 1
|
||||
print(f"\n[ERROR] Failed to compress {file_path}: {error}")
|
||||
if is_corrupted:
|
||||
corrupted += 1
|
||||
# Move corrupted file to corrupted folder
|
||||
move_success, move_error = move_to_corrupted(file_path, input_dir, corrupted_dir)
|
||||
if move_success:
|
||||
corrupted_files.append((file_path, error, f"Moved to {corrupted_dir / Path(file_path).relative_to(input_dir)}"))
|
||||
print(f"\n[CORRUPTED] {file_path}: {error} -> Moved to corrupted folder")
|
||||
else:
|
||||
corrupted_files.append((file_path, error, f"Failed to move: {move_error}"))
|
||||
print(f"\n[CORRUPTED] {file_path}: {error} (Failed to move: {move_error})")
|
||||
else:
|
||||
failed += 1
|
||||
print(f"\n[ERROR] Failed to compress {file_path}: {error}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
total_savings = total_original_size - total_new_size
|
||||
@@ -311,12 +396,30 @@ def main():
|
||||
print(f"Skipped (already exist): {skipped} files")
|
||||
if jpg_files:
|
||||
print(f"Ignored (JPG/JPEG): {len(jpg_files)} files")
|
||||
if corrupted > 0:
|
||||
print(f"Corrupted (bad PNGs): {corrupted} files")
|
||||
print(f"Failed: {failed} files")
|
||||
print(f"Total time: {format_time(total_time)}")
|
||||
print(f"Average speed: {avg_rate:.2f} files/second")
|
||||
print(f"Original size: {format_size(total_original_size)}")
|
||||
print(f"Compressed size: {format_size(total_new_size)}")
|
||||
print(f"Total savings: {format_size(total_savings)} ({total_savings_pct:.1f}%)")
|
||||
|
||||
# Print list of corrupted files if any
|
||||
if corrupted_files:
|
||||
print("\n" + "=" * 80)
|
||||
print("CORRUPTED FILES LIST:")
|
||||
print("=" * 80)
|
||||
for item in corrupted_files:
|
||||
if len(item) == 3:
|
||||
file_path, error, move_status = item
|
||||
print(f" {file_path}")
|
||||
print(f" Reason: {error}")
|
||||
print(f" Status: {move_status}")
|
||||
else:
|
||||
file_path, error = item
|
||||
print(f" {file_path}")
|
||||
print(f" Reason: {error}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user