Compare commits
4 Commits
8d7750a78f
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd5c8a9ab4 | ||
|
|
f1cf096069 | ||
|
|
1885b1d55d | ||
|
|
4416949d7c |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
input/
|
||||
output/
|
||||
corrupted/
|
||||
|
||||
|
||||
@@ -3012,3 +3012,959 @@ The Threadripper’s higher core count should provide a clear speedup for this C
|
||||
|
||||
---
|
||||
|
||||
_**User (2025-12-11 21:39Z)**_
|
||||
|
||||
How about a threadripper 1950X?
|
||||
|
||||
---
|
||||
|
||||
_**Agent (model default, mode Agent)**_
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
The Threadripper 1950X will likely be slower than the i9-13900KF, despite having more threads.
|
||||
|
||||
**Threadripper 1950X:**
|
||||
- 16 physical cores, 32 threads
|
||||
- First-gen Threadripper (Zen, 2017)
|
||||
- The script would use 32 worker processes
|
||||
- Older architecture, lower IPC
|
||||
|
||||
**i9-13900KF:**
|
||||
- 8 P-cores + 8 E-cores = 16 physical cores, 24 threads
|
||||
- Raptor Lake (2022)
|
||||
- The script would use 24 worker processes
|
||||
- Much higher single-threaded performance
|
||||
|
||||
**Why the i9-13900KF will likely be faster:**
|
||||
1. Per-core performance: The i9’s P-cores are significantly faster (roughly 50–70% higher IPC).
|
||||
2. Core efficiency: 24 faster workers can outperform 32 slower ones for this workload.
|
||||
3. Memory: The i9’s newer memory controller can help.
|
||||
4. Architecture: 5+ years of improvements in efficiency and performance.
|
||||
|
||||
**Expected performance:**
|
||||
- The i9-13900KF should be roughly 20–40% faster, depending on I/O and the specific compression work.
|
||||
|
||||
**Bottom line:** The 1950X’s extra threads don’t make up for the i9’s much faster cores. The 3970X’s large core count can overcome the per-core gap; the 1950X cannot.
|
||||
|
||||
---
|
||||
|
||||
_**User (2025-12-11 21:39Z)**_
|
||||
|
||||
Sometimes, PNGs corrupt; we'll get bad results where everything is black, the image cuts off too early, or the file is just 0kb (placeholder). Can we have it report bad PNGs?
|
||||
|
||||
---
|
||||
|
||||
_**Agent (model default, mode Agent)**_
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Adding validation to detect and report corrupted PNGs:
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 43, lines removed: 1
|
||||
|
||||
```diff
|
||||
@@ -38,5 +38,47 @@
|
||||
|
||||
original_size = os.path.getsize(input_path)
|
||||
- img = Image.open(input_path)
|
||||
+
|
||||
+ # Check for corrupted/empty files
|
||||
+ if original_size == 0:
|
||||
+ return (str(input_path), False, "CORRUPTED: File is 0 bytes (empty/placeholder)", 0, 0, 0, False, True)
|
||||
+
|
||||
+ # Try to open and validate the image
|
||||
+ try:
|
||||
+ img = Image.open(input_path)
|
||||
+ # Force load to detect corruption early
|
||||
+ img.load()
|
||||
+ except Exception as e:
|
||||
+ return (str(input_path), False, f"CORRUPTED: Cannot open/load image - {str(e)}", original_size, 0, 0, False, True)
|
||||
+
|
||||
+ # Validate image dimensions
|
||||
+ if img.width == 0 or img.height == 0:
|
||||
+ return (str(input_path), False, f"CORRUPTED: Invalid dimensions ({img.width}x{img.height})", original_size, 0, 0, False, True)
|
||||
+
|
||||
+ # Check if image appears completely black (potential corruption indicator)
|
||||
+ # But be careful - some images might legitimately be black
|
||||
+ # We'll only flag this if the file is suspiciously small for its dimensions
|
||||
+ try:
|
||||
+ # Sample a few pixels to check if all are black
|
||||
+ sample_size = min(100, img.width * img.height)
|
||||
+ pixels = list(img.getdata()[:sample_size])
|
||||
+ if pixels:
|
||||
+ # Check if all sampled pixels are black (0 or (0,0,0) or (0,0,0,0))
|
||||
+ all_black = True
|
||||
+ for pixel in pixels:
|
||||
+ if isinstance(pixel, (tuple, list)):
|
||||
+ if any(p > 0 for p in pixel):
|
||||
+ all_black = False
|
||||
+ break
|
||||
+ else:
|
||||
+ if pixel > 0:
|
||||
+ all_black = False
|
||||
+ break
|
||||
+
|
||||
+ # If all black AND file is suspiciously small, flag as potentially corrupted
|
||||
+ if all_black and original_size < (img.width * img.height * 0.1): # Less than 0.1 bytes per pixel
|
||||
+ return (str(input_path), False, f"CORRUPTED: Image appears all black with suspiciously small file size", original_size, 0, 0, False, True)
|
||||
+ except:
|
||||
+ # If we can't check pixels, continue anyway
|
||||
+ pass
|
||||
|
||||
# Determine target bit depth
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 1, lines removed: 1
|
||||
|
||||
```diff
|
||||
@@ -32,5 +32,5 @@
|
||||
savings = original_size - new_size
|
||||
savings_pct = (savings / original_size * 100) if original_size > 0 else 0
|
||||
- return (str(input_path), True, None, original_size, new_size, savings_pct, True)
|
||||
+ return (str(input_path), True, None, original_size, new_size, savings_pct, True, False)
|
||||
|
||||
# Ensure output directory exists
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 4, lines removed: 2
|
||||
|
||||
```diff
|
||||
@@ -174,7 +174,9 @@
|
||||
savings = original_size - new_size
|
||||
savings_pct = (savings / original_size * 100) if original_size > 0 else 0
|
||||
- return (str(input_path), True, None, original_size, new_size, savings_pct, False)
|
||||
+ return (str(input_path), True, None, original_size, new_size, savings_pct, False, False)
|
||||
except Exception as e:
|
||||
- return (str(input_path), False, str(e), 0, 0, 0, False)
|
||||
+ # Check if error might indicate corruption
|
||||
+ is_corrupted = "truncated" in str(e).lower() or "cannot identify" in str(e).lower() or "corrupt" in str(e).lower()
|
||||
+ return (str(input_path), False, str(e), 0, 0, 0, False, is_corrupted)
|
||||
|
||||
def find_image_files(input_dir):
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 2, lines removed: undefined
|
||||
|
||||
```diff
|
||||
@@ -291,4 +291,6 @@
|
||||
skipped = 0
|
||||
failed = 0
|
||||
+ corrupted = 0
|
||||
+ corrupted_files = []
|
||||
total_original_size = 0
|
||||
total_new_size = 0
|
||||
```
|
||||
|
||||
**Chunk 2**
|
||||
Lines added: 1, lines removed: 1
|
||||
|
||||
```diff
|
||||
@@ -306,5 +308,5 @@
|
||||
for future in as_completed(future_to_file):
|
||||
result = future.result()
|
||||
- file_path, success, error, orig_size, new_size, savings_pct, was_skipped = result
|
||||
+ file_path, success, error, orig_size, new_size, savings_pct, was_skipped, is_corrupted = result
|
||||
|
||||
if success:
|
||||
```
|
||||
|
||||
**Chunk 3**
|
||||
Lines added: 1, lines removed: 1
|
||||
|
||||
```diff
|
||||
@@ -325,5 +327,5 @@
|
||||
if processed == 1 or time_since_update >= 0.5:
|
||||
rate = processed / elapsed if elapsed > 0 else 0
|
||||
- remaining = len(png_files) - processed
|
||||
+ remaining = len(png_files) - processed - failed
|
||||
eta_seconds = remaining / rate if rate > 0 and remaining > 0 else 0
|
||||
|
||||
```
|
||||
|
||||
**Chunk 4**
|
||||
Lines added: 9, lines removed: 4
|
||||
|
||||
```diff
|
||||
@@ -335,12 +337,17 @@
|
||||
|
||||
print(f"[{processed:5d}/{len(png_files)}] "
|
||||
- f"Compressed: {compressed} | Skipped: {skipped} | "
|
||||
+ f"Compressed: {compressed} | Skipped: {skipped} | Corrupted: {corrupted} | "
|
||||
f"Speed: {rate:.1f} files/sec | "
|
||||
f"Elapsed: {elapsed_str} | ETA: {eta_str} | "
|
||||
f"Saved: {format_size(total_savings)} ({total_savings_pct:.1f}%)", end='\r')
|
||||
last_update_time = current_time
|
||||
- else:
|
||||
- failed += 1
|
||||
- print(f"\n[ERROR] Failed to compress {file_path}: {error}")
|
||||
+ else:
|
||||
+ if is_corrupted:
|
||||
+ corrupted += 1
|
||||
+ corrupted_files.append((file_path, error))
|
||||
+ print(f"\n[CORRUPTED] {file_path}: {error}")
|
||||
+ else:
|
||||
+ failed += 1
|
||||
+ print(f"\n[ERROR] Failed to compress {file_path}: {error}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 2, lines removed: undefined
|
||||
|
||||
```diff
|
||||
@@ -363,4 +363,6 @@
|
||||
if jpg_files:
|
||||
print(f"Ignored (JPG/JPEG): {len(jpg_files)} files")
|
||||
+ if corrupted > 0:
|
||||
+ print(f"Corrupted (bad PNGs): {corrupted} files")
|
||||
print(f"Failed: {failed} files")
|
||||
print(f"Total time: {format_time(total_time)}")
|
||||
```
|
||||
|
||||
**Chunk 2**
|
||||
Lines added: 9, lines removed: undefined
|
||||
|
||||
```diff
|
||||
@@ -369,4 +371,13 @@
|
||||
print(f"Compressed size: {format_size(total_new_size)}")
|
||||
print(f"Total savings: {format_size(total_savings)} ({total_savings_pct:.1f}%)")
|
||||
+
|
||||
+ # Print list of corrupted files if any
|
||||
+ if corrupted_files:
|
||||
+ print("\n" + "=" * 80)
|
||||
+ print("CORRUPTED FILES LIST:")
|
||||
+ print("=" * 80)
|
||||
+ for file_path, error in corrupted_files:
|
||||
+ print(f" {file_path}")
|
||||
+ print(f" Reason: {error}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="generic" data-tool-name="read_lints">
|
||||
<details>
|
||||
<summary>Tool use: **read_lints** • Read lints for 1 file</summary>
|
||||
|
||||
Lint paths:
|
||||
|
||||
- `compress_pngs.py`
|
||||
|
||||
Lint results
|
||||
|
||||
|
||||
**No lint errors found**
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updated the script to detect and report corrupted PNGs. It now:
|
||||
|
||||
1. **Detects 0-byte files** — flags empty/placeholder files
|
||||
2. **Validates image loading** — checks if the PNG can be opened and loaded
|
||||
3. **Checks dimensions** — flags files with 0x0 or invalid dimensions
|
||||
4. **Detects suspicious all-black images** — flags images that are all black with suspiciously small file sizes (potential corruption)
|
||||
5. **Tracks corrupted files separately** — counts them separately from regular failures
|
||||
6. **Reports in real-time** — shows `[CORRUPTED]` messages as they're found
|
||||
7. **Summary report** — includes corrupted file count in the final summary
|
||||
8. **Detailed list** — prints a list of all corrupted files with reasons at the end
|
||||
|
||||
The script will now:
|
||||
- Show corrupted files in the progress output: `Corrupted: X`
|
||||
- Print `[CORRUPTED]` messages when found
|
||||
- List all corrupted files with their specific error reasons at the end
|
||||
|
||||
This should catch the issues you mentioned: black images, cut-off images, and 0-byte placeholder files.
|
||||
|
||||
---
|
||||
|
||||
_**User (2025-12-11 21:39Z)**_
|
||||
|
||||
@pwsh (155-303) nice! can we make it so it moves all of them from input\corrupted to another folder to isolate them?
|
||||
|
||||
---
|
||||
|
||||
_**Agent (model default, mode Agent)**_
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updating the script to move corrupted files to a `corrupted` folder, preserving the directory structure:
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 25, lines removed: undefined
|
||||
|
||||
```diff
|
||||
@@ -208,4 +208,29 @@
|
||||
return output_dir / relative_path
|
||||
|
||||
+ def move_to_corrupted(input_path, input_dir, corrupted_dir):
|
||||
+ """Move a corrupted file to the corrupted folder, preserving directory structure."""
|
||||
+ try:
|
||||
+ input_path = Path(input_path)
|
||||
+ input_dir = Path(input_dir)
|
||||
+ corrupted_dir = Path(corrupted_dir)
|
||||
+
|
||||
+ # Get relative path from input directory
|
||||
+ try:
|
||||
+ relative_path = input_path.relative_to(input_dir)
|
||||
+ except ValueError:
|
||||
+ relative_path = input_path.name
|
||||
+
|
||||
+ # Create destination path
|
||||
+ dest_path = corrupted_dir / relative_path
|
||||
+
|
||||
+ # Ensure destination directory exists
|
||||
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
+
|
||||
+ # Move the file
|
||||
+ input_path.rename(dest_path)
|
||||
+ return True, None
|
||||
+ except Exception as e:
|
||||
+ return False, str(e)
|
||||
+
|
||||
def format_size(size_bytes):
|
||||
"""Format file size in human readable format."""
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 4, lines removed: 1
|
||||
|
||||
```diff
|
||||
@@ -305,6 +305,9 @@
|
||||
print(f"Found {len(png_files)} PNG files to process.")
|
||||
|
||||
- # Create output directory
|
||||
+ # Create output and corrupted directories
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
+ corrupted_dir = Path('corrupted')
|
||||
+ corrupted_dir.mkdir(exist_ok=True)
|
||||
+ print(f"Corrupted files will be moved to: {corrupted_dir}")
|
||||
|
||||
# Use all available CPU cores
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 8, lines removed: 2
|
||||
|
||||
```diff
|
||||
@@ -373,6 +373,12 @@
|
||||
if is_corrupted:
|
||||
corrupted += 1
|
||||
- corrupted_files.append((file_path, error))
|
||||
- print(f"\n[CORRUPTED] {file_path}: {error}")
|
||||
+ # Move corrupted file to corrupted folder
|
||||
+ move_success, move_error = move_to_corrupted(file_path, input_dir, corrupted_dir)
|
||||
+ if move_success:
|
||||
+ corrupted_files.append((file_path, error, f"Moved to {corrupted_dir / Path(file_path).relative_to(input_dir)}"))
|
||||
+ print(f"\n[CORRUPTED] {file_path}: {error} -> Moved to corrupted folder")
|
||||
+ else:
|
||||
+ corrupted_files.append((file_path, error, f"Failed to move: {move_error}"))
|
||||
+ print(f"\n[CORRUPTED] {file_path}: {error} (Failed to move: {move_error})")
|
||||
else:
|
||||
failed += 1
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 10, lines removed: 3
|
||||
|
||||
```diff
|
||||
@@ -411,7 +411,14 @@
|
||||
print("CORRUPTED FILES LIST:")
|
||||
print("=" * 80)
|
||||
- for file_path, error in corrupted_files:
|
||||
- print(f" {file_path}")
|
||||
- print(f" Reason: {error}")
|
||||
+ for item in corrupted_files:
|
||||
+ if len(item) == 3:
|
||||
+ file_path, error, move_status = item
|
||||
+ print(f" {file_path}")
|
||||
+ print(f" Reason: {error}")
|
||||
+ print(f" Status: {move_status}")
|
||||
+ else:
|
||||
+ file_path, error = item
|
||||
+ print(f" {file_path}")
|
||||
+ print(f" Reason: {error}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="generic" data-tool-name="read_lints">
|
||||
<details>
|
||||
<summary>Tool use: **read_lints** • Read lints for 1 file</summary>
|
||||
|
||||
Lint paths:
|
||||
|
||||
- `compress_pngs.py`
|
||||
|
||||
Lint results
|
||||
|
||||
|
||||
**No lint errors found**
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updating `.gitignore` to ignore the `corrupted` folder:
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: .gitignore</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: .gitignore</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 1, lines removed: undefined
|
||||
|
||||
```diff
|
||||
@@ -1,4 +1,5 @@
|
||||
input/
|
||||
output/
|
||||
+ corrupted/
|
||||
|
||||
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updated the script to automatically move corrupted PNG files to a `corrupted` folder. It will:
|
||||
|
||||
1. **Create a `corrupted` folder** at the root level
|
||||
2. **Move corrupted files** there when detected, preserving the directory structure (e.g., `input\Pack2BagScan_animation 6c\file.png` → `corrupted\Pack2BagScan_animation 6c\file.png`)
|
||||
3. **Report the move** in real-time, showing where each file was moved
|
||||
4. **Show move status** in the final summary
|
||||
|
||||
The script will now:
|
||||
- Display `[CORRUPTED] file.png: error -> Moved to corrupted folder` when moving files
|
||||
- List all moved files in the summary with their new locations
|
||||
- Keep corrupted files isolated from your input folder
|
||||
|
||||
Also updated `.gitignore` to ignore the `corrupted` folder. When you run the script, corrupted files will be automatically moved out of the input folder and into the corrupted folder, making it easy to identify and handle them separately.
|
||||
|
||||
---
|
||||
|
||||
_**User (2025-12-11 21:39Z)**_
|
||||
|
||||
@pwsh (506-528) jpegs should be included in the skipped count (Skipped: 0)
|
||||
|
||||
---
|
||||
|
||||
_**Agent (model default, mode Agent)**_
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updating the script to count JPG/JPEG files as skipped:
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 1, lines removed: 1
|
||||
|
||||
```diff
|
||||
@@ -317,5 +317,5 @@
|
||||
|
||||
compressed = 0
|
||||
- skipped = 0
|
||||
+ skipped = len(jpg_files) # Start with JPG files counted as skipped
|
||||
failed = 0
|
||||
corrupted = 0
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="write" data-tool-name="search_replace">
|
||||
|
||||
<details><summary>Tool use: **search_replace** • Edit file: compress_pngs.py</summary>
|
||||
|
||||
**Chunk 1**
|
||||
Lines added: 4, lines removed: 2
|
||||
|
||||
```diff
|
||||
@@ -394,7 +394,9 @@
|
||||
print(f"Compression complete!")
|
||||
print(f"Successfully compressed: {compressed} files")
|
||||
- print(f"Skipped (already exist): {skipped} files")
|
||||
+ skipped_existing = skipped - len(jpg_files) if skipped >= len(jpg_files) else 0
|
||||
if jpg_files:
|
||||
- print(f"Ignored (JPG/JPEG): {len(jpg_files)} files")
|
||||
+ print(f"Skipped: {skipped} files ({skipped_existing} already exist, {len(jpg_files)} JPG/JPEG)")
|
||||
+ else:
|
||||
+ print(f"Skipped (already exist): {skipped} files")
|
||||
if corrupted > 0:
|
||||
print(f"Corrupted (bad PNGs): {corrupted} files")
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="generic" data-tool-name="read_lints">
|
||||
<details>
|
||||
<summary>Tool use: **read_lints** • Read lints for 1 file</summary>
|
||||
|
||||
Lint paths:
|
||||
|
||||
- `compress_pngs.py`
|
||||
|
||||
Lint results
|
||||
|
||||
|
||||
**No lint errors found**
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updating the progress display to show the correct total, including JPG files:
|
||||
|
||||
|
||||
---
|
||||
|
||||
<tool-use data-tool-type="read" data-tool-name="read_file">
|
||||
<details><summary>Tool use: **read_file** • Read file: compress_pngs.py</summary>
|
||||
|
||||
</details>
|
||||
</tool-use>
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
Updated the script to count JPG/JPEG files as skipped. Changes:
|
||||
|
||||
1. **Initialize skipped count** with the number of JPG files found
|
||||
2. **Progress display** shows the total skipped count (including JPGs)
|
||||
3. **Final summary** shows skipped files with a breakdown: `Skipped: X files (Y already exist, Z JPG/JPEG)`
|
||||
|
||||
When you run the script, JPG files are included in the skipped count from the start, so you'll see something like:
|
||||
- `Skipped: 803` (if all 803 are JPGs)
|
||||
- Or `Skipped: 805` (if 803 are JPGs and 2 PNGs were already processed)
|
||||
|
||||
The summary will show the breakdown of what was skipped.
|
||||
|
||||
---
|
||||
|
||||
_**User (2025-12-11 21:39Z)**_
|
||||
|
||||
for this script, which wins, a 7800x3d, or a threadripper 1950X?
|
||||
|
||||
---
|
||||
|
||||
_**Agent (model default, mode Agent)**_
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
The **7800X3D should win** for this script.
|
||||
|
||||
**7800X3D:**
|
||||
- 8 cores, 16 threads → script uses 16 worker processes
|
||||
- Zen 4 (2023)
|
||||
- Strong single-threaded performance
|
||||
- Higher IPC
|
||||
|
||||
**Threadripper 1950X:**
|
||||
- 16 cores, 32 threads → script uses 32 worker processes
|
||||
- Zen 1 (2017)
|
||||
- Lower single-threaded performance
|
||||
- Lower IPC
|
||||
|
||||
**Why the 7800X3D wins:**
|
||||
1. Per-core performance: Each worker finishes faster, so 16 fast workers can outperform 32 slower ones.
|
||||
2. Architecture gap: Zen 4 vs Zen 1 is a large IPC improvement.
|
||||
3. PNG compression: PIL operations benefit from faster cores; the 7800X3D’s cores are roughly 2–3x faster per core.
|
||||
|
||||
**Expected performance:**
|
||||
- The 7800X3D should be roughly 30–50% faster, depending on I/O.
|
||||
|
||||
**Bottom line:** The 1950X’s extra threads don’t offset the 7800X3D’s much faster cores. The 7800X3D is the better choice for this workload.
|
||||
|
||||
---
|
||||
|
||||
|
||||
40
.specstory/history/2025-12-23_23-54Z-untitled.md
Normal file
40
.specstory/history/2025-12-23_23-54Z-untitled.md
Normal file
@@ -0,0 +1,40 @@
|
||||
<!-- Generated by SpecStory, Markdown v2.1.0 -->
|
||||
|
||||
<!-- cursor Session c6b9ac3c-667a-4529-8923-79fe43eb7838 (2025-12-23 23:54Z) -->
|
||||
|
||||
# Untitled (2025-12-23 23:54Z)
|
||||
|
||||
_**User (2025-12-23 23:54Z)**_
|
||||
|
||||
it didn't work on the threadripper. additional context: @.specstory/history/2025-12-11_18-21Z-compress-png-sequences-with-multithreading.md
|
||||
|
||||
Mode: Auto-detect bit depth (preserve 16-bit if present)
|
||||
Input directory: input
|
||||
Output directory: output
|
||||
Scanning for image files...
|
||||
Found 646 JPG/JPEG files - ignoring (skipping)
|
||||
Found 6371 PNG files to process.
|
||||
Corrupted files will be moved to: corrupted
|
||||
Using 64 worker processes for compression...
|
||||
--------------------------------------------------------------------------------
|
||||
Traceback (most recent call last):
|
||||
File "E:\SequenceCompressor\compress_pngs.py", line 427, in <module>
|
||||
main()
|
||||
~~~~^^
|
||||
File "E:\SequenceCompressor\compress_pngs.py", line 328, in main
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
||||
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Python314\Lib\concurrent\futures\process.py", line 671, in __init__
|
||||
raise ValueError(
|
||||
f"max_workers must be <= {_MAX_WINDOWS_WORKERS}")
|
||||
ValueError: max_workers must be <= 61
|
||||
Press any key to continue . . .
|
||||
|
||||
---
|
||||
|
||||
_**Agent (model default, mode Agent)**_
|
||||
|
||||
|
||||
|
||||
---
|
||||
|
||||
146
compress_pngs.py
146
compress_pngs.py
@@ -7,12 +7,21 @@ Compresses all PNG files in subdirectories with maximum parallelism.
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import platform
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from PIL import Image
|
||||
import multiprocessing
|
||||
import time
|
||||
|
||||
# Try to unlock ProcessPoolExecutor on Windows to bypass 61-worker limit
|
||||
try:
|
||||
import unlock_processpool
|
||||
unlock_processpool.please()
|
||||
UNLOCKED = True
|
||||
except ImportError:
|
||||
UNLOCKED = False
|
||||
|
||||
def compress_png(input_path, output_path, force_bitdepth=None):
|
||||
"""Compress a single PNG file.
|
||||
|
||||
@@ -31,13 +40,55 @@ def compress_png(input_path, output_path, force_bitdepth=None):
|
||||
new_size = os.path.getsize(output_path)
|
||||
savings = original_size - new_size
|
||||
savings_pct = (savings / original_size * 100) if original_size > 0 else 0
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, True)
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, True, False)
|
||||
|
||||
# Ensure output directory exists
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
original_size = os.path.getsize(input_path)
|
||||
|
||||
# Check for corrupted/empty files
|
||||
if original_size == 0:
|
||||
return (str(input_path), False, "CORRUPTED: File is 0 bytes (empty/placeholder)", 0, 0, 0, False, True)
|
||||
|
||||
# Try to open and validate the image
|
||||
try:
|
||||
img = Image.open(input_path)
|
||||
# Force load to detect corruption early
|
||||
img.load()
|
||||
except Exception as e:
|
||||
return (str(input_path), False, f"CORRUPTED: Cannot open/load image - {str(e)}", original_size, 0, 0, False, True)
|
||||
|
||||
# Validate image dimensions
|
||||
if img.width == 0 or img.height == 0:
|
||||
return (str(input_path), False, f"CORRUPTED: Invalid dimensions ({img.width}x{img.height})", original_size, 0, 0, False, True)
|
||||
|
||||
# Check if image appears completely black (potential corruption indicator)
|
||||
# But be careful - some images might legitimately be black
|
||||
# We'll only flag this if the file is suspiciously small for its dimensions
|
||||
try:
|
||||
# Sample a few pixels to check if all are black
|
||||
sample_size = min(100, img.width * img.height)
|
||||
pixels = list(img.getdata()[:sample_size])
|
||||
if pixels:
|
||||
# Check if all sampled pixels are black (0 or (0,0,0) or (0,0,0,0))
|
||||
all_black = True
|
||||
for pixel in pixels:
|
||||
if isinstance(pixel, (tuple, list)):
|
||||
if any(p > 0 for p in pixel):
|
||||
all_black = False
|
||||
break
|
||||
else:
|
||||
if pixel > 0:
|
||||
all_black = False
|
||||
break
|
||||
|
||||
# If all black AND file is suspiciously small, flag as potentially corrupted
|
||||
if all_black and original_size < (img.width * img.height * 0.1): # Less than 0.1 bytes per pixel
|
||||
return (str(input_path), False, f"CORRUPTED: Image appears all black with suspiciously small file size", original_size, 0, 0, False, True)
|
||||
except:
|
||||
# If we can't check pixels, continue anyway
|
||||
pass
|
||||
|
||||
# Determine target bit depth
|
||||
if force_bitdepth == '8':
|
||||
@@ -131,9 +182,11 @@ def compress_png(input_path, output_path, force_bitdepth=None):
|
||||
new_size = os.path.getsize(output_path)
|
||||
savings = original_size - new_size
|
||||
savings_pct = (savings / original_size * 100) if original_size > 0 else 0
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, False)
|
||||
return (str(input_path), True, None, original_size, new_size, savings_pct, False, False)
|
||||
except Exception as e:
|
||||
return (str(input_path), False, str(e), 0, 0, 0, False)
|
||||
# Check if error might indicate corruption
|
||||
is_corrupted = "truncated" in str(e).lower() or "cannot identify" in str(e).lower() or "corrupt" in str(e).lower()
|
||||
return (str(input_path), False, str(e), 0, 0, 0, False, is_corrupted)
|
||||
|
||||
def find_image_files(input_dir):
|
||||
"""Find all PNG and JPG files in subdirectories."""
|
||||
@@ -163,6 +216,31 @@ def get_output_path(input_path, input_dir, output_dir):
|
||||
# Create output path
|
||||
return output_dir / relative_path
|
||||
|
||||
def move_to_corrupted(input_path, input_dir, corrupted_dir):
|
||||
"""Move a corrupted file to the corrupted folder, preserving directory structure."""
|
||||
try:
|
||||
input_path = Path(input_path)
|
||||
input_dir = Path(input_dir)
|
||||
corrupted_dir = Path(corrupted_dir)
|
||||
|
||||
# Get relative path from input directory
|
||||
try:
|
||||
relative_path = input_path.relative_to(input_dir)
|
||||
except ValueError:
|
||||
relative_path = input_path.name
|
||||
|
||||
# Create destination path
|
||||
dest_path = corrupted_dir / relative_path
|
||||
|
||||
# Ensure destination directory exists
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Move the file
|
||||
input_path.rename(dest_path)
|
||||
return True, None
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
def format_size(size_bytes):
|
||||
"""Format file size in human readable format."""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
@@ -235,17 +313,32 @@ def main():
|
||||
|
||||
print(f"Found {len(png_files)} PNG files to process.")
|
||||
|
||||
# Create output directory
|
||||
# Create output and corrupted directories
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
corrupted_dir = Path('corrupted')
|
||||
corrupted_dir.mkdir(exist_ok=True)
|
||||
print(f"Corrupted files will be moved to: {corrupted_dir}")
|
||||
|
||||
# Use all available CPU cores
|
||||
max_workers = multiprocessing.cpu_count()
|
||||
cpu_count = multiprocessing.cpu_count()
|
||||
if platform.system() == 'Windows' and not UNLOCKED:
|
||||
# Windows ProcessPoolExecutor has a maximum of 61 workers (unless unlocked)
|
||||
max_workers = min(cpu_count, 61)
|
||||
if cpu_count > 61:
|
||||
print(f"Detected {cpu_count} CPU threads, but Windows limits ProcessPoolExecutor to 61 workers.")
|
||||
print("Install 'unlock-processpool-win' package to use all cores: pip install unlock-processpool-win")
|
||||
else:
|
||||
max_workers = cpu_count
|
||||
if UNLOCKED:
|
||||
print(f"Using unlock-processpool-win to bypass Windows 61-worker limit")
|
||||
print(f"Using {max_workers} worker processes for compression...")
|
||||
print("-" * 80)
|
||||
|
||||
compressed = 0
|
||||
skipped = 0
|
||||
skipped = len(jpg_files) # Start with JPG files counted as skipped
|
||||
failed = 0
|
||||
corrupted = 0
|
||||
corrupted_files = []
|
||||
total_original_size = 0
|
||||
total_new_size = 0
|
||||
start_time = time.time()
|
||||
@@ -261,7 +354,7 @@ def main():
|
||||
# Process results as they complete
|
||||
for future in as_completed(future_to_file):
|
||||
result = future.result()
|
||||
file_path, success, error, orig_size, new_size, savings_pct, was_skipped = result
|
||||
file_path, success, error, orig_size, new_size, savings_pct, was_skipped, is_corrupted = result
|
||||
|
||||
if success:
|
||||
if was_skipped:
|
||||
@@ -280,7 +373,7 @@ def main():
|
||||
# Update every file or every 0.5 seconds, whichever comes first
|
||||
if processed == 1 or time_since_update >= 0.5:
|
||||
rate = processed / elapsed if elapsed > 0 else 0
|
||||
remaining = len(png_files) - processed
|
||||
remaining = len(png_files) - processed - failed
|
||||
eta_seconds = remaining / rate if rate > 0 and remaining > 0 else 0
|
||||
|
||||
total_savings = total_original_size - total_new_size
|
||||
@@ -290,11 +383,22 @@ def main():
|
||||
elapsed_str = format_time(elapsed)
|
||||
|
||||
print(f"[{processed:5d}/{len(png_files)}] "
|
||||
f"Compressed: {compressed} | Skipped: {skipped} | "
|
||||
f"Compressed: {compressed} | Skipped: {skipped} | Corrupted: {corrupted} | "
|
||||
f"Speed: {rate:.1f} files/sec | "
|
||||
f"Elapsed: {elapsed_str} | ETA: {eta_str} | "
|
||||
f"Saved: {format_size(total_savings)} ({total_savings_pct:.1f}%)", end='\r')
|
||||
last_update_time = current_time
|
||||
else:
|
||||
if is_corrupted:
|
||||
corrupted += 1
|
||||
# Move corrupted file to corrupted folder
|
||||
move_success, move_error = move_to_corrupted(file_path, input_dir, corrupted_dir)
|
||||
if move_success:
|
||||
corrupted_files.append((file_path, error, f"Moved to {corrupted_dir / Path(file_path).relative_to(input_dir)}"))
|
||||
print(f"\n[CORRUPTED] {file_path}: {error} -> Moved to corrupted folder")
|
||||
else:
|
||||
corrupted_files.append((file_path, error, f"Failed to move: {move_error}"))
|
||||
print(f"\n[CORRUPTED] {file_path}: {error} (Failed to move: {move_error})")
|
||||
else:
|
||||
failed += 1
|
||||
print(f"\n[ERROR] Failed to compress {file_path}: {error}")
|
||||
@@ -308,9 +412,13 @@ def main():
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Compression complete!")
|
||||
print(f"Successfully compressed: {compressed} files")
|
||||
print(f"Skipped (already exist): {skipped} files")
|
||||
skipped_existing = skipped - len(jpg_files) if skipped >= len(jpg_files) else 0
|
||||
if jpg_files:
|
||||
print(f"Ignored (JPG/JPEG): {len(jpg_files)} files")
|
||||
print(f"Skipped: {skipped} files ({skipped_existing} already exist, {len(jpg_files)} JPG/JPEG)")
|
||||
else:
|
||||
print(f"Skipped (already exist): {skipped} files")
|
||||
if corrupted > 0:
|
||||
print(f"Corrupted (bad PNGs): {corrupted} files")
|
||||
print(f"Failed: {failed} files")
|
||||
print(f"Total time: {format_time(total_time)}")
|
||||
print(f"Average speed: {avg_rate:.2f} files/second")
|
||||
@@ -318,6 +426,22 @@ def main():
|
||||
print(f"Compressed size: {format_size(total_new_size)}")
|
||||
print(f"Total savings: {format_size(total_savings)} ({total_savings_pct:.1f}%)")
|
||||
|
||||
# Print list of corrupted files if any
|
||||
if corrupted_files:
|
||||
print("\n" + "=" * 80)
|
||||
print("CORRUPTED FILES LIST:")
|
||||
print("=" * 80)
|
||||
for item in corrupted_files:
|
||||
if len(item) == 3:
|
||||
file_path, error, move_status = item
|
||||
print(f" {file_path}")
|
||||
print(f" Reason: {error}")
|
||||
print(f" Status: {move_status}")
|
||||
else:
|
||||
file_path, error = item
|
||||
print(f" {file_path}")
|
||||
print(f" Reason: {error}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user