fix zipseq not clearing zips

This commit is contained in:
Nathan
2025-11-25 13:17:46 -07:00
parent 0cff26cb0d
commit 68330de0fb
3 changed files with 1892 additions and 21 deletions

View File

@@ -18,6 +18,7 @@ import subprocess
import sys
import tempfile
import time
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Iterator, Sequence
@@ -888,6 +889,9 @@ def run_zip(requested_workers: int | None, *, verbose: bool) -> int:
f"Summary: scanned {total_scanned}, quick-skipped {quick_skipped}, "
f"state-skipped {state_skipped}, empty {empty_dirs}, queued {queued}",
)
removed = cleanup_orphan_archives(verbose=verbose)
if removed:
log("zip", f"Removed {removed} orphan archive(s).", verbose=verbose)
return 0
# Calculate RAM-aware worker count based on work items
@@ -986,36 +990,83 @@ def run_expand(worker_count: int, *, verbose: bool) -> int:
def cleanup_orphan_archives(*, verbose: bool) -> int:
if not ARCHIVE_ROOT.exists():
log("zip", "Archive root does not exist; nothing to clean up.", verbose_only=True, verbose=verbose)
return 0
removed: list[Path] = []
log("zip", f"Scanning for orphan archives in {ARCHIVE_ROOT.resolve()}", verbose_only=True, verbose=verbose)
# Look for both .zip and .7z archives
archive_patterns = ["*.zip", "*.7z"]
for pattern in archive_patterns:
for zip_path in ARCHIVE_ROOT.rglob(pattern):
seq_dir = sequence_dir_for(zip_path)
if seq_dir.exists():
continue
try:
for zip_path in ARCHIVE_ROOT.rglob(pattern):
try:
# Resolve to absolute paths for consistent checking
zip_path_abs = zip_path.resolve()
# Calculate state path BEFORE checking/removing archive
state_path = state_path_for(zip_path)
state_path_abs = state_path.resolve()
# Calculate sequence directory using sequence_dir_for
# This function works with paths relative to ARCHIVE_ROOT
seq_dir = sequence_dir_for(zip_path)
seq_dir_abs = seq_dir.resolve()
# Check if sequence directory exists and is actually a directory
if seq_dir_abs.exists() and seq_dir_abs.is_dir():
log("zip", f"Archive {zip_path.relative_to(ARCHIVE_ROOT)} has matching sequence directory; keeping", verbose_only=True, verbose=verbose)
continue
rel = zip_path.relative_to(ARCHIVE_ROOT)
log("zip", f"Removing orphan archive {rel}", verbose_only=True, verbose=verbose)
# Sequence directory doesn't exist - this is an orphan archive
rel = zip_path.relative_to(ARCHIVE_ROOT)
log("zip", f"Removing orphan archive {rel}", verbose_only=False, verbose=verbose)
zip_path.unlink(missing_ok=True)
state_path = state_path_for(zip_path)
if state_path.exists():
state_path.unlink()
removed.append(zip_path)
# Remove archive file
if zip_path_abs.exists():
zip_path_abs.unlink()
log("zip", f"Deleted archive: {rel}", verbose_only=True, verbose=verbose)
# Remove state file if it exists
if state_path_abs.exists():
state_path_abs.unlink()
state_rel = state_path.relative_to(ARCHIVE_ROOT)
log("zip", f"Removed orphan metadata {state_rel}", verbose_only=False, verbose=verbose)
removed.append(zip_path_abs)
except Exception as e:
# Log error but continue processing other archives
try:
rel = zip_path.relative_to(ARCHIVE_ROOT)
except:
rel = zip_path
log("zip", f"Error processing archive {rel}: {e}", verbose_only=True, verbose=verbose)
log("zip", f"Traceback: {traceback.format_exc()}", verbose_only=True, verbose=verbose)
continue
except Exception as e:
log("zip", f"Error scanning for {pattern} archives: {e}", verbose_only=True, verbose=verbose)
log("zip", f"Traceback: {traceback.format_exc()}", verbose_only=True, verbose=verbose)
continue
if not removed:
log("zip", "No orphan archives found.", verbose_only=True, verbose=verbose)
return 0
for parent in sorted({p.parent for p in removed}, key=lambda p: len(p.parts), reverse=True):
if not parent.exists():
# Clean up empty parent directories
archive_root_abs = ARCHIVE_ROOT.resolve()
for parent in sorted({p.resolve().parent for p in removed}, key=lambda p: len(p.parts), reverse=True):
parent_resolved = parent.resolve()
if not parent_resolved.exists():
continue
while parent != ARCHIVE_ROOT and not any(parent.iterdir()):
parent.rmdir()
parent = parent.parent
try:
while parent_resolved != archive_root_abs and not any(parent_resolved.iterdir()):
parent_resolved.rmdir()
parent_resolved = parent_resolved.parent.resolve()
except OSError:
# Ignore errors when removing directories (might be in use)
pass
return len(removed)