diff --git a/.specstory/history/2025-12-29_17-30Z-zipseq-thumbs-db-exclusion.md b/.specstory/history/2025-12-29_17-30Z-zipseq-thumbs-db-exclusion.md index bc25f15..c3e936e 100644 --- a/.specstory/history/2025-12-29_17-30Z-zipseq-thumbs-db-exclusion.md +++ b/.specstory/history/2025-12-29_17-30Z-zipseq-thumbs-db-exclusion.md @@ -251,3 +251,305 @@ No linting errors were found. The changes are ready to use. --- +_**User (2025-12-29 17:37Z)**_ + +does zipseq currently scan other types in the seq than png? e.g. jpg, exr + +--- + +_**Agent (model default, mode Agent)**_ + + + +--- + + +Checking which file types `zipseq` scans: + + +--- + + +
Tool use: **read_file** • Read file: zip_sequences.py + +
+
+ +--- + + +
Tool use: **read_file** • Read file: zip_sequences.py + +
+
+ +--- + + + +--- + + +Yes. `zipseq` scans multiple file types, not just PNG. + +```45:52:zip_sequences.py +SEQUENCE_EXTENSIONS = { + ".png", + ".jpg", + ".jpeg", + ".tif", + ".tiff", + ".exr", +} +``` + +The `find_sequence_dirs()` function uses this set to identify sequence directories: + +```491:491:zip_sequences.py + has_frames = any(Path(dirpath, f).suffix.lower() in SEQUENCE_EXTENSIONS for f in filenames) +``` + +Supported extensions: +- `.png` +- `.jpg` +- `.jpeg` +- `.tif` +- `.tiff` +- `.exr` + +All of these are included in archives and tracked in manifests. + +--- + +_**User (2025-12-29 17:37Z)**_ + +if there's a sequence with both jpg and png (by mistake) will it scan and compress both? + +--- + +_**Agent (model default, mode Agent)**_ + + + +--- + + +Yes. If a sequence directory contains both JPG and PNG files, both will be scanned and compressed. + +How it works: +1. `find_sequence_dirs()` checks if a directory has any files matching `SEQUENCE_EXTENSIONS` (`.png`, `.jpg`, `.jpeg`, `.tif`, `.tiff`, `.exr`). If it finds any, it treats the directory as a sequence directory. +2. Once identified as a sequence directory, `iter_sequence_files()` includes all files in that directory (except `Thumbs.db` and archive paths), regardless of extension: + +```517:524:zip_sequences.py +def iter_sequence_files(seq_dir: Path) -> Iterator[Path]: + for dirpath, dirnames, filenames in os.walk(seq_dir): + path = Path(dirpath) + dirnames[:] = [d for d in dirnames if d not in ("_archive", "_CURRENT")] + if is_archive_path(path): + continue + for filename in filenames: + # Ignore Thumbs.db files completely + if filename.lower() == "thumbs.db": + continue + yield path / filename +``` + +So if a directory has both `frame_001.jpg` and `frame_001.png`, both will be: +- Included in the state manifest +- Compressed into the archive + +The extension check only determines whether a directory is a sequence directory; it doesn't filter which files get included once it's identified as one. + +--- + +_**User (2025-12-29 17:37Z)**_ + +I had one with jpgs that I removed so only the pngs remain, but the manifest isn't changing. + +--- + +_**Agent (model default, mode Agent)**_ + + + +--- + + +Checking the state comparison logic to see why the manifest isn't updating after removing JPGs: + + +--- + + +
Tool use: **read_file** • Read file: zip_sequences.py + +
+
+ +--- + + +
+ Tool use: **codebase_search** • Searched codebase "How does the quick skip check work when directory mtime is older than archive?" • **14** results + +| File | Lines | +|------|-------| +| `zip_sequences.py` | L981-1156 | +| `zip_sequences.py` | L979-1156 | +| `scripts_old/0MoveToCurrent.bat` | L61 | +| `zip_sequences.py` | L563-650 | +| `zip_sequences.py` | L471-513 | +| `zip_sequences.py` | L516-560 | +| `zip_sequences.py` | L1159-1286 | +| `zip_sequences.py` | L676-969 | +| `zip_sequences.py` | L861-969 | +| `zip_sequences.py` | L984-1286 | +| `UpdateSequences.ps1` | L54-96 | +| `zip_sequences.py` | L584-657 | +| `zip_sequences.py` | L861-976 | +| `zip_sequences.py` | L557-650 | + +
+
+ +--- + + + +--- + + +The quick-skip optimization is likely the cause. It skips state computation if the directory mtime is <= the archive mtime, which can miss deletions when the directory mtime doesn't update. Checking the logic and making it more reliable: + + +--- + + +
Tool use: **read_file** • Read file: zip_sequences.py + +
+
+ +--- + + + +--- + + +The quick-skip optimization can miss file deletions when the directory mtime doesn't update. Adding a pre-check: if any files from the stored state are missing, we'll recompute the state instead of skipping. + + +--- + + + +
Tool use: **search_replace** • Edit file: zip_sequences.py + +**Chunk 1** +Lines added: 29, lines removed: 15 + +```diff +@@ -1030,19 +1030,33 @@ + if zip_path.exists() and stored_state is not None: + # Quick check: if directory mtime is older than archive, likely unchanged +- try: +- dir_mtime = seq_dir.stat().st_mtime_ns +- archive_mtime = zip_path.stat().st_mtime_ns +- # If directory wasn't modified since archive was created, skip state computation +- if dir_mtime <= archive_mtime: +- quick_skipped += 1 +- if quick_skipped <= 5: +- log("scan", f"Skipping {rel} (unchanged since archive)") +- # Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete) +- if old_zip_path and old_zip_path.exists(): +- old_zip_path.unlink(missing_ok=True) +- old_state_path = state_path_for(old_zip_path) +- if old_state_path.exists(): +- old_state_path.unlink(missing_ok=True) +- continue ++ # But first verify that all files in stored state still exist (catches deletions) ++ try: ++ # Quick check: verify stored files still exist (catches file deletions) ++ stored_files_exist = True ++ for entry in stored_state.get("files", []): ++ # Skip Thumbs.db entries (they're filtered out anyway) ++ if Path(entry.get("path", "")).name.lower() == "thumbs.db": ++ continue ++ file_path = seq_dir / entry["path"] ++ if not file_path.exists(): ++ stored_files_exist = False ++ break ++ ++ # Only do mtime check if all stored files still exist ++ if stored_files_exist: ++ dir_mtime = seq_dir.stat().st_mtime_ns ++ archive_mtime = zip_path.stat().st_mtime_ns ++ # If directory wasn't modified since archive was created, skip state computation ++ if dir_mtime <= archive_mtime: ++ quick_skipped += 1 ++ if quick_skipped <= 5: ++ log("scan", f"Skipping {rel} (unchanged since archive)") ++ # Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete) ++ if old_zip_path and old_zip_path.exists(): ++ old_zip_path.unlink(missing_ok=True) ++ old_state_path = state_path_for(old_zip_path) ++ if old_state_path.exists(): ++ old_state_path.unlink(missing_ok=True) ++ continue + except OSError: + # If stat fails, fall through to full state computation +``` + +
+ +
+ +--- + + + +--- + + + + + +--- + + +
+ Tool use: **read_lints** • Read lints for 1 file + +Lint paths: + +- `zip_sequences.py` + +Lint results + + +**No lint errors found** +
+
+ +--- + + + +--- + + +Fixed the issue. The quick-skip optimization was skipping state computation when the directory mtime was <= the archive mtime, which can miss file deletions if the directory mtime doesn't update. + +**The fix:** Before the quick-skip, the code now verifies that all files in the stored state still exist. If any are missing (like the JPGs you removed), it skips the quick-skip and recomputes the full state, which will detect the change and update the manifest. + +Now when you run `zipseq`: +1. It checks if stored files still exist +2. If JPGs are missing, it recomputes the state +3. It detects the difference and updates the manifest +4. It recompresses the archive with only the remaining PNGs + +The manifest should now update correctly when files are removed, even if the directory mtime doesn't change. + +--- + diff --git a/zip_sequences.py b/zip_sequences.py index 7437630..8106ba3 100644 --- a/zip_sequences.py +++ b/zip_sequences.py @@ -1029,21 +1029,35 @@ def run_zip(requested_workers: int | None, *, verbose: bool) -> int: # If .7z archive exists and we have stored state, do quick check before computing full state if zip_path.exists() and stored_state is not None: # Quick check: if directory mtime is older than archive, likely unchanged + # But first verify that all files in stored state still exist (catches deletions) try: - dir_mtime = seq_dir.stat().st_mtime_ns - archive_mtime = zip_path.stat().st_mtime_ns - # If directory wasn't modified since archive was created, skip state computation - if dir_mtime <= archive_mtime: - quick_skipped += 1 - if quick_skipped <= 5: - log("scan", f"Skipping {rel} (unchanged since archive)") - # Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete) - if old_zip_path and old_zip_path.exists(): - old_zip_path.unlink(missing_ok=True) - old_state_path = state_path_for(old_zip_path) - if old_state_path.exists(): - old_state_path.unlink(missing_ok=True) - continue + # Quick check: verify stored files still exist (catches file deletions) + stored_files_exist = True + for entry in stored_state.get("files", []): + # Skip Thumbs.db entries (they're filtered out anyway) + if Path(entry.get("path", "")).name.lower() == "thumbs.db": + continue + file_path = seq_dir / entry["path"] + if not file_path.exists(): + stored_files_exist = False + break + + # Only do mtime check if all stored files still exist + if stored_files_exist: + dir_mtime = seq_dir.stat().st_mtime_ns + archive_mtime = zip_path.stat().st_mtime_ns + # If directory wasn't modified since archive was created, skip state computation + if dir_mtime <= archive_mtime: + quick_skipped += 1 + if quick_skipped <= 5: + log("scan", f"Skipping {rel} (unchanged since archive)") + # Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete) + if old_zip_path and old_zip_path.exists(): + old_zip_path.unlink(missing_ok=True) + old_state_path = state_path_for(old_zip_path) + if old_state_path.exists(): + old_state_path.unlink(missing_ok=True) + continue except OSError: # If stat fails, fall through to full state computation pass