2025-11-06 10:10:29 -07:00
#!/usr/bin/env python3
""" Maintain zipped render sequences for Git hooks.
Default mode scans ` Renders / ` , produces ZIP archives under ` Renders / _zipped / ` ,
and stages any updated archives so commits only track compact files . Switch to
` - - mode expand ` to inflate the tracked archives back into the ignored working
directories after checkouts or pulls .
"""
from __future__ import annotations
import argparse
import json
import os
2025-11-10 20:56:57 -07:00
import platform
2025-11-06 10:10:29 -07:00
import shutil
2025-11-10 10:36:28 -07:00
import subprocess
2025-11-06 10:10:29 -07:00
import sys
2025-11-10 10:36:28 -07:00
import tempfile
2025-11-10 11:00:12 -07:00
import time
2025-11-06 10:10:29 -07:00
from concurrent . futures import ThreadPoolExecutor , as_completed
from pathlib import Path
from typing import Iterator , Sequence
2025-11-10 20:56:57 -07:00
# Try to import psutil for cross-platform RAM detection
try :
import psutil
HAS_PSUTIL = True
except ImportError :
HAS_PSUTIL = False
# For Windows fallback
if platform . system ( ) == " Windows " :
try :
import ctypes
HAS_CTYPES = True
except ImportError :
HAS_CTYPES = False
else :
HAS_CTYPES = False
2025-11-06 10:10:29 -07:00
RENDER_ROOT = Path ( " Renders " )
ARCHIVE_ROOT = RENDER_ROOT / " _zipped "
SEQUENCE_EXTENSIONS = {
" .png " ,
" .jpg " ,
" .jpeg " ,
" .tif " ,
" .tiff " ,
" .exr " ,
}
STATE_SUFFIX = " .meta.json "
2025-11-08 02:36:20 -07:00
DEFAULT_CONFIG = {
2025-11-10 10:24:55 -07:00
" zipper " : " 7z " ,
2025-11-08 02:36:20 -07:00
" compression " : 9 ,
2025-11-10 10:24:55 -07:00
" dailyFormat " : " daily_YYMMDD " ,
2025-11-10 20:56:57 -07:00
" Max7zInst " : 0 , # Maximum concurrent 7z instances (0 = auto-calculate)
2025-11-08 02:36:20 -07:00
}
def load_config ( ) - > dict :
2025-11-10 11:27:44 -07:00
# First try to load from project's .config folder (current working directory)
# Then fall back to ProjectStructure repo config (next to zip_sequences.py)
cwd = Path . cwd ( )
project_config = cwd / " .config " / " config.json "
repo_config = Path ( __file__ ) . resolve ( ) . with_name ( " config.json " )
config_paths = [
( " project " , project_config ) ,
( " repo " , repo_config ) ,
]
for source , config_path in config_paths :
try :
if config_path . exists ( ) :
text = config_path . read_text ( encoding = " utf-8 " )
try :
data = json . loads ( text )
if isinstance ( data , dict ) :
merged = DEFAULT_CONFIG . copy ( )
merged . update ( data )
return merged
except json . JSONDecodeError :
continue
except OSError :
continue
# If no config found, return defaults
return DEFAULT_CONFIG . copy ( )
2025-11-08 02:36:20 -07:00
CONFIG = load_config ( )
2025-11-10 10:24:55 -07:00
zipper_val = CONFIG . get ( " zipper " , " 7z " )
# Handle both old boolean format and new string format
if isinstance ( zipper_val , bool ) :
ZIPPER_TYPE = " 7z " if zipper_val else " zip "
else :
ZIPPER_TYPE = str ( zipper_val ) . lower ( )
2025-11-10 11:27:44 -07:00
2025-11-08 02:36:20 -07:00
COMPRESSION_LEVEL = CONFIG . get ( " compression " , 9 )
if isinstance ( COMPRESSION_LEVEL , str ) :
try :
COMPRESSION_LEVEL = int ( COMPRESSION_LEVEL )
except ValueError :
COMPRESSION_LEVEL = 9
if not isinstance ( COMPRESSION_LEVEL , int ) :
COMPRESSION_LEVEL = 9
COMPRESSION_LEVEL = max ( 0 , min ( 9 , COMPRESSION_LEVEL ) )
2025-11-10 20:56:57 -07:00
MAX_7Z_INSTANCES = CONFIG . get ( " Max7zInst " , 0 )
if MAX_7Z_INSTANCES is not None :
if isinstance ( MAX_7Z_INSTANCES , str ) :
try :
MAX_7Z_INSTANCES = int ( MAX_7Z_INSTANCES )
except ValueError :
MAX_7Z_INSTANCES = 0
if not isinstance ( MAX_7Z_INSTANCES , int ) or MAX_7Z_INSTANCES < 1 :
MAX_7Z_INSTANCES = 0
# Treat 0 as None (auto-calculate)
if MAX_7Z_INSTANCES == 0 :
MAX_7Z_INSTANCES = None
2025-11-08 02:36:20 -07:00
SEVEN_Z_EXE : str | None = None
2025-11-10 10:36:28 -07:00
if ZIPPER_TYPE == " 7z " :
2025-11-08 02:36:20 -07:00
SEVEN_Z_EXE = shutil . which ( " 7z " ) or shutil . which ( " 7za " )
2025-11-06 10:10:29 -07:00
def parse_args ( ) - > argparse . Namespace :
parser = argparse . ArgumentParser ( description = " Sync render sequences with zipped archives. " )
parser . add_argument (
" --mode " ,
choices = ( " zip " , " expand " ) ,
default = " zip " ,
help = " zip sequences for commit (default) or expand tracked archives " ,
)
parser . add_argument ( " --jobs " , type = int , help = " max parallel workers " )
parser . add_argument ( " --verbose " , action = " store_true " , help = " print extra progress details " )
return parser . parse_args ( )
2025-11-10 20:56:57 -07:00
def get_available_ram ( ) - > int | None :
""" Get available RAM in bytes, reserving 20 % f or system.
Returns :
Available RAM in bytes , or None if detection fails .
"""
try :
if HAS_PSUTIL :
# Use psutil for cross-platform RAM detection
mem = psutil . virtual_memory ( )
# Reserve 20% for system, use 80% for compression jobs
available = int ( mem . total * 0.8 )
return available
elif HAS_CTYPES and platform . system ( ) == " Windows " :
# Windows fallback using ctypes
class MEMORYSTATUSEX ( ctypes . Structure ) :
_fields_ = [
( " dwLength " , ctypes . c_ulong ) ,
( " dwMemoryLoad " , ctypes . c_ulong ) ,
( " ullTotalPhys " , ctypes . c_ulonglong ) ,
( " ullAvailPhys " , ctypes . c_ulonglong ) ,
( " ullTotalPageFile " , ctypes . c_ulonglong ) ,
( " ullAvailPageFile " , ctypes . c_ulonglong ) ,
( " ullTotalVirtual " , ctypes . c_ulonglong ) ,
( " ullAvailVirtual " , ctypes . c_ulonglong ) ,
( " ullAvailExtendedVirtual " , ctypes . c_ulonglong ) ,
]
kernel32 = ctypes . windll . kernel32
kernel32 . GlobalMemoryStatusEx . argtypes = [ ctypes . POINTER ( MEMORYSTATUSEX ) ]
kernel32 . GlobalMemoryStatusEx . restype = ctypes . c_bool
mem_status = MEMORYSTATUSEX ( )
mem_status . dwLength = ctypes . sizeof ( MEMORYSTATUSEX )
if kernel32 . GlobalMemoryStatusEx ( ctypes . byref ( mem_status ) ) :
# Reserve 20% for system, use 80% for compression jobs
available = int ( mem_status . ullTotalPhys * 0.8 )
return available
except Exception :
pass
return None
def estimate_ram_per_job ( seq_dir : Path , seq_state : dict ) - > int :
""" Estimate RAM usage per compression job based on folder size.
Args :
seq_dir : Path to the sequence directory
seq_state : State dictionary containing file information
Returns :
Estimated RAM usage in bytes
"""
# Calculate total folder size from seq_state
total_bytes = sum ( entry . get ( " size " , 0 ) for entry in seq_state . get ( " files " , [ ] ) )
if ZIPPER_TYPE == " 7z " :
# Base RAM: 500MB per job
base_ram = 500 * 1024 * 1024 # 500 MB
# Compression factor: 7z can use significant RAM, especially for large files
# Use 0.15x factor (conservative estimate accounting for 7z's 80% usage)
compression_factor = 0.15
# For very large folders (>10GB), cap at 8GB per job
max_ram_per_job = 8 * 1024 * 1024 * 1024 # 8 GB
large_folder_threshold = 10 * 1024 * 1024 * 1024 # 10 GB
if total_bytes > large_folder_threshold :
estimated_ram = max_ram_per_job
else :
estimated_ram = max ( base_ram , int ( total_bytes * compression_factor ) )
estimated_ram = min ( estimated_ram , max_ram_per_job )
return estimated_ram
else :
# zip compression is more memory-efficient
# Conservative estimate: 1GB per job
return 1024 * 1024 * 1024 # 1 GB
def max_workers (
requested : int | None ,
work_items : list [ tuple [ Path , Path , Path , dict ] ] | None = None ,
* ,
verbose : bool = False
) - > tuple [ int , int | None ] :
""" Calculate maximum worker count based on CPU and RAM constraints.
Args :
requested : User - requested worker count ( from - - jobs )
work_items : List of work items ( seq_dir , zip_path , state_path , seq_state )
verbose : Whether to log RAM - based calculations
Returns :
Tuple of ( worker_count , per_job_memory_limit_bytes )
per_job_memory_limit_bytes is None if not using 7 z or RAM detection failed
"""
2025-11-06 10:10:29 -07:00
cpu = os . cpu_count ( ) or 1
2025-11-10 20:56:57 -07:00
cpu_limit = max ( 1 , min ( 8 , cpu ) )
2025-11-06 10:10:29 -07:00
if requested and requested > 0 :
2025-11-10 20:56:57 -07:00
cpu_limit = min ( requested , max ( 1 , cpu ) )
# If no work items provided, return CPU-based limit
if work_items is None or len ( work_items ) == 0 :
return ( cpu_limit , None )
# Try to calculate RAM-based limit
available_ram = get_available_ram ( )
if available_ram is None :
# RAM detection failed, fall back to CPU limit
if verbose :
log ( " zip " , " RAM detection failed, using CPU-based worker limit " , verbose_only = True , verbose = verbose )
return ( cpu_limit , None )
# For 7z: use fixed dictionary size and calculate workers
if ZIPPER_TYPE == " 7z " :
# Fixed dictionary size: 1GB (1024MB)
FIXED_DICT_SIZE_MB = 1024
fixed_dict_size_bytes = FIXED_DICT_SIZE_MB * 1024 * 1024
# Check if Max7zInst is configured
if MAX_7Z_INSTANCES is not None :
2025-11-11 16:31:09 -07:00
# Use configured maximum instances, but still respect user's --jobs and work items
2025-11-10 20:56:57 -07:00
final_limit = MAX_7Z_INSTANCES
2025-11-11 16:31:09 -07:00
num_work_items = len ( work_items ) if work_items else 0
if num_work_items > 0 :
final_limit = min ( final_limit , num_work_items )
2025-11-10 20:56:57 -07:00
if requested and requested > 0 :
final_limit = min ( final_limit , requested )
if verbose :
log (
" zip " ,
f " Using Max7zInst= { MAX_7Z_INSTANCES } from config → "
2025-11-11 16:31:09 -07:00
f " work items: { num_work_items } , requested: { requested } , final: { final_limit } " ,
2025-11-10 20:56:57 -07:00
verbose_only = True ,
verbose = verbose
)
return ( final_limit , fixed_dict_size_bytes )
# Auto-calculate based on RAM if Max7zInst not configured
2025-11-11 16:31:09 -07:00
# "Balls-to-the-walls" mode: use maximum resources
2025-11-10 20:56:57 -07:00
if available_ram is not None :
2025-11-11 16:31:09 -07:00
# 7z uses ~2-3x dictionary size in RAM, use 3x for aggressive mode
# This is more realistic and allows more concurrent workers
FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 3 * 1024 * 1024 # 3GB per job
2025-11-10 20:56:57 -07:00
# available_ram is already 80% of total (20% reserved for system)
2025-11-11 16:31:09 -07:00
# Use 95% of available RAM for compression jobs (aggressive mode)
compression_ram = int ( available_ram * 0.95 )
2025-11-10 20:56:57 -07:00
# Calculate worker limit based on fixed per-job RAM
ram_limit = max ( 1 , compression_ram / / FIXED_RAM_PER_JOB )
2025-11-11 16:31:09 -07:00
# Cap at number of actual work items (can't have more workers than jobs)
num_work_items = len ( work_items ) if work_items else 0
if num_work_items > 0 :
ram_limit = min ( ram_limit , num_work_items )
2025-11-10 20:56:57 -07:00
# Use RAM limit directly (no CPU limit)
final_limit = ram_limit
if requested and requested > 0 :
final_limit = min ( final_limit , requested )
if verbose :
ram_gb = available_ram / ( 1024 * * 3 )
compression_ram_gb = compression_ram / ( 1024 * * 3 )
ram_per_job_gb = FIXED_RAM_PER_JOB / ( 1024 * * 3 )
log (
" zip " ,
2025-11-11 16:31:09 -07:00
f " RAM: { ram_gb : .1f } GB available (80% of total), { compression_ram_gb : .1f } GB for compression (95%), { ram_per_job_gb : .1f } GB per job (dict: { FIXED_DICT_SIZE_MB } MB) → "
f " RAM limit: { ram_limit } , work items: { num_work_items } , requested: { requested } , final: { final_limit } " ,
2025-11-10 20:56:57 -07:00
verbose_only = True ,
verbose = verbose
)
return ( final_limit , fixed_dict_size_bytes )
# RAM detection failed, use a safe default (no CPU limit)
default_limit = 4
2025-11-11 16:31:09 -07:00
num_work_items = len ( work_items ) if work_items else 0
if num_work_items > 0 :
default_limit = min ( default_limit , num_work_items )
2025-11-10 20:56:57 -07:00
if requested and requested > 0 :
2025-11-11 16:31:09 -07:00
default_limit = min ( default_limit , requested )
if verbose :
log (
" zip " ,
f " RAM detection failed and Max7zInst not set, using default worker limit → "
f " work items: { num_work_items } , requested: { requested } , final: { default_limit } " ,
verbose_only = True ,
verbose = verbose
)
2025-11-10 20:56:57 -07:00
return ( default_limit , fixed_dict_size_bytes )
# For zip compression, use existing estimation-based approach
# Estimate RAM per job for each work item
ram_estimates = [ ]
for seq_dir , zip_path , state_path , seq_state in work_items :
try :
estimated_ram = estimate_ram_per_job ( seq_dir , seq_state )
ram_estimates . append ( estimated_ram )
except Exception :
# If estimation fails, use fallback estimate
ram_estimates . append ( 1024 * 1024 * 1024 ) # 1GB fallback for zip
if not ram_estimates :
return ( cpu_limit , None )
max_ram_per_job = max ( ram_estimates )
ram_limit = max ( 1 , available_ram / / max_ram_per_job )
ram_limit = min ( ram_limit , 6 ) # Conservative limit for zip
final_limit = min ( cpu_limit , ram_limit )
if verbose :
ram_gb = available_ram / ( 1024 * * 3 )
max_ram_gb = max_ram_per_job / ( 1024 * * 3 )
log (
" zip " ,
f " RAM: { ram_gb : .1f } GB available, ~ { max_ram_gb : .1f } GB per job → "
f " RAM limit: { ram_limit } , CPU limit: { cpu_limit } , final: { final_limit } " ,
verbose_only = True ,
verbose = verbose
)
return ( final_limit , None )
2025-11-06 10:10:29 -07:00
def log ( mode : str , message : str , * , verbose_only : bool = False , verbose : bool = False ) - > None :
if verbose_only and not verbose :
return
print ( f " [ { mode } ] { message } " )
2025-11-06 12:35:07 -07:00
def is_archive_path ( path : Path ) - > bool :
2025-11-10 19:32:31 -07:00
return any ( part in ( " _archive " , " _CURRENT " ) for part in path . parts )
2025-11-06 12:35:07 -07:00
2025-11-06 10:10:29 -07:00
def find_sequence_dirs ( root : Path ) - > Iterator [ Path ] :
for dirpath , dirnames , filenames in os . walk ( root ) :
path = Path ( dirpath )
2025-11-10 19:32:31 -07:00
dirnames [ : ] = [ d for d in dirnames if d not in ( " _archive " , " _CURRENT " ) ]
2025-11-06 12:35:07 -07:00
if is_archive_path ( path ) :
2025-11-06 10:10:29 -07:00
continue
has_frames = any ( Path ( dirpath , f ) . suffix . lower ( ) in SEQUENCE_EXTENSIONS for f in filenames )
if has_frames :
yield path
2025-11-06 12:35:07 -07:00
def iter_sequence_files ( seq_dir : Path ) - > Iterator [ Path ] :
for dirpath , dirnames , filenames in os . walk ( seq_dir ) :
path = Path ( dirpath )
2025-11-10 19:32:31 -07:00
dirnames [ : ] = [ d for d in dirnames if d not in ( " _archive " , " _CURRENT " ) ]
2025-11-06 12:35:07 -07:00
if is_archive_path ( path ) :
continue
for filename in filenames :
yield path / filename
2025-11-06 10:10:29 -07:00
def compute_state ( seq_dir : Path ) - > dict :
entries = [ ]
2025-11-06 12:35:07 -07:00
files = sorted (
iter_sequence_files ( seq_dir ) ,
key = lambda p : p . relative_to ( seq_dir ) . as_posix ( ) ,
)
for file_path in files :
2025-11-06 10:10:29 -07:00
stat = file_path . stat ( )
entries . append (
{
2025-11-06 12:35:07 -07:00
" path " : file_path . relative_to ( seq_dir ) . as_posix ( ) ,
2025-11-06 10:10:29 -07:00
" size " : stat . st_size ,
" mtime_ns " : stat . st_mtime_ns ,
}
)
return { " files " : entries }
def current_state ( seq_dir : Path ) - > dict :
if not seq_dir . exists ( ) or not seq_dir . is_dir ( ) :
return { " files " : [ ] }
return compute_state ( seq_dir )
def load_state ( state_path : Path ) - > dict | None :
if not state_path . exists ( ) :
return None
try :
return json . loads ( state_path . read_text ( ) )
except json . JSONDecodeError :
return None
def state_changed ( seq_state : dict , stored_state : dict | None ) - > bool :
if stored_state is None :
return True
return seq_state != stored_state
def archive_path_for ( seq_dir : Path ) - > Path :
rel = seq_dir . relative_to ( RENDER_ROOT )
2025-11-10 11:27:44 -07:00
suffix = " .7z " if ZIPPER_TYPE == " 7z " else " .zip "
return ( ARCHIVE_ROOT / rel ) . with_suffix ( suffix )
2025-11-06 10:10:29 -07:00
def sequence_dir_for ( zip_path : Path ) - > Path :
rel = zip_path . relative_to ( ARCHIVE_ROOT )
return ( RENDER_ROOT / rel ) . with_suffix ( " " )
def state_path_for ( zip_path : Path ) - > Path :
return zip_path . with_suffix ( zip_path . suffix + STATE_SUFFIX )
2025-11-10 20:56:57 -07:00
def zip_sequence ( seq_dir : Path , zip_path : Path , per_job_memory_limit : int | None = None ) - > None :
2025-11-10 10:36:28 -07:00
if ZIPPER_TYPE == " 7z " :
if SEVEN_Z_EXE is None :
2025-11-10 11:27:44 -07:00
raise RuntimeError (
" 7z compression requested but 7z executable not found in PATH. "
" Please install 7z (e.g., via Chocolatey: choco install 7zip) "
" or set zipper to ' zip ' in config.json "
)
2025-11-08 02:36:20 -07:00
zip_path . parent . mkdir ( parents = True , exist_ok = True )
2025-11-10 11:27:44 -07:00
# If creating a .7z file, remove any existing .zip file for the same sequence
if zip_path . suffix == " .7z " :
old_zip_path = zip_path . with_suffix ( " .zip " )
if old_zip_path . exists ( ) :
old_zip_path . unlink ( missing_ok = True )
old_state_path = state_path_for ( old_zip_path )
if old_state_path . exists ( ) :
old_state_path . unlink ( missing_ok = True )
2025-11-10 11:00:12 -07:00
2025-11-10 10:36:28 -07:00
# Build list of files to archive with relative paths
file_list = [ ]
for file_path in iter_sequence_files ( seq_dir ) :
rel_path = file_path . relative_to ( seq_dir ) . as_posix ( )
file_list . append ( rel_path )
2025-11-10 11:00:12 -07:00
if not file_list :
raise RuntimeError ( f " No files found to archive in { seq_dir } " )
2025-11-10 10:36:28 -07:00
2025-11-10 11:27:44 -07:00
# Create zip in temporary location first to avoid issues with corrupted existing files
temp_zip = None
2025-11-10 11:00:12 -07:00
list_file_path = None
2025-11-10 10:36:28 -07:00
try :
2025-11-10 11:27:44 -07:00
# Create temporary archive file path (but don't create the file - let 7z create it)
temp_zip_path = tempfile . mktemp ( suffix = " .7z " , dir = zip_path . parent )
temp_zip = Path ( temp_zip_path )
2025-11-10 11:00:12 -07:00
# Create list file with absolute path
fd , temp_path = tempfile . mkstemp ( suffix = " .lst " , text = True )
list_file_path = Path ( temp_path )
with os . fdopen ( fd , " w " , encoding = " utf-8 " ) as list_file :
for rel_path in file_list :
list_file . write ( rel_path + " \n " )
list_file . flush ( )
os . fsync ( list_file . fileno ( ) ) # Ensure data is written to disk
# File is closed here by context manager, small delay to ensure OS releases handle
time . sleep ( 0.1 )
2025-11-10 11:27:44 -07:00
# Use absolute paths for both list file and temp zip
2025-11-10 11:00:12 -07:00
list_file_abs = list_file_path . resolve ( )
2025-11-10 11:27:44 -07:00
temp_zip_abs = temp_zip . resolve ( )
# Create archive in temp location first (7z will create it fresh)
2025-11-10 10:36:28 -07:00
cmd = [
SEVEN_Z_EXE ,
" a " ,
" -y " ,
2025-11-10 11:00:12 -07:00
" -bb0 " , # Suppress progress output
2025-11-10 10:36:28 -07:00
f " -mx= { COMPRESSION_LEVEL } " ,
2025-11-10 11:27:44 -07:00
" -t7z " , # Use 7z format, not zip
2025-11-10 20:56:57 -07:00
]
# Add fixed dictionary size if specified (7z memory usage is controlled by dictionary size)
if per_job_memory_limit is not None :
# per_job_memory_limit is actually the fixed dictionary size in bytes
dict_size_mb = per_job_memory_limit / / ( 1024 * 1024 )
cmd . append ( f " -md= { dict_size_mb } m " )
cmd . extend ( [
2025-11-10 11:27:44 -07:00
str ( temp_zip_abs ) ,
2025-11-10 11:00:12 -07:00
f " @ { list_file_abs } " ,
2025-11-10 20:56:57 -07:00
] )
2025-11-10 10:36:28 -07:00
result = subprocess . run (
cmd ,
cwd = seq_dir ,
check = False ,
stdout = subprocess . PIPE ,
stderr = subprocess . PIPE ,
text = True ,
)
if result . returncode != 0 :
2025-11-10 11:00:12 -07:00
error_msg = result . stderr . strip ( ) if result . stderr else " Unknown error "
if result . stdout :
error_msg + = f " \n stdout: { result . stdout . strip ( ) } "
raise RuntimeError ( f " 7z compression failed: { error_msg } " )
2025-11-10 11:27:44 -07:00
# Move temp zip to final location, replacing any existing file
if zip_path . exists ( ) :
zip_path . unlink ( )
temp_zip . replace ( zip_path )
temp_zip = None # Mark as moved so we don't delete it
2025-11-10 10:36:28 -07:00
finally :
2025-11-10 11:27:44 -07:00
# Clean up temp zip if it wasn't moved
if temp_zip and temp_zip . exists ( ) :
try :
temp_zip . unlink ( missing_ok = True )
except OSError :
pass
2025-11-10 11:00:12 -07:00
# Clean up list file, with retry in case 7z still has it open
if list_file_path and list_file_path . exists ( ) :
for attempt in range ( 3 ) :
try :
list_file_path . unlink ( missing_ok = True )
break
except PermissionError :
if attempt < 2 :
time . sleep ( 0.1 ) # Wait 100ms before retry
else :
# Last attempt failed, just log and continue
# The temp file will be cleaned up by the OS eventually
pass
2025-11-08 02:36:20 -07:00
return
2025-11-10 11:27:44 -07:00
# Use zipfile (only if ZIPPER_TYPE == "zip")
if ZIPPER_TYPE == " zip " :
from zipfile import ZIP_DEFLATED , ZIP_STORED , ZipFile
2025-11-06 10:10:29 -07:00
2025-11-10 11:27:44 -07:00
zip_path . parent . mkdir ( parents = True , exist_ok = True )
if COMPRESSION_LEVEL < = 0 :
compression = ZIP_STORED
zip_kwargs = { }
else :
compression = ZIP_DEFLATED
zip_kwargs = { " compresslevel " : COMPRESSION_LEVEL }
2025-11-08 02:36:20 -07:00
2025-11-10 11:27:44 -07:00
with ZipFile ( zip_path , " w " , compression = compression , * * zip_kwargs ) as archive :
for file_path in iter_sequence_files ( seq_dir ) :
archive . write ( file_path , arcname = file_path . relative_to ( seq_dir ) . as_posix ( ) )
return
# Unknown ZIPPER_TYPE - fail with clear error
raise RuntimeError (
f " Unsupported ZIPPER_TYPE: { ZIPPER_TYPE !r} . "
f " Expected ' 7z ' or ' zip ' . "
f " Config zipper value: { CONFIG . get ( ' zipper ' , ' not set ' ) !r} "
)
2025-11-06 10:10:29 -07:00
def expand_sequence ( zip_path : Path , seq_state : dict ) - > None :
target_dir = sequence_dir_for ( zip_path )
if target_dir . exists ( ) :
shutil . rmtree ( target_dir )
target_dir . mkdir ( parents = True , exist_ok = True )
2025-11-10 10:36:28 -07:00
if ZIPPER_TYPE == " 7z " :
if SEVEN_Z_EXE is None :
2025-11-10 11:27:44 -07:00
raise RuntimeError (
" 7z extraction requested but 7z executable not found in PATH. "
" Please install 7z or set zipper to ' zip ' in config.json "
)
2025-11-08 02:36:20 -07:00
cmd = [
SEVEN_Z_EXE ,
" x " ,
" -y " ,
str ( zip_path ) ,
f " -o { target_dir } " ,
]
2025-11-10 11:27:44 -07:00
result = subprocess . run (
cmd ,
check = False ,
stdout = subprocess . PIPE ,
stderr = subprocess . PIPE ,
text = True ,
)
if result . returncode != 0 :
error_msg = result . stderr . strip ( ) if result . stderr else " Unknown error "
if result . stdout :
error_msg + = f " \n stdout: { result . stdout . strip ( ) } "
raise RuntimeError ( f " 7z extraction failed: { error_msg } " )
elif ZIPPER_TYPE == " zip " :
2025-11-08 02:36:20 -07:00
from zipfile import ZipFile
with ZipFile ( zip_path , " r " ) as archive :
archive . extractall ( target_dir )
2025-11-10 11:27:44 -07:00
else :
raise RuntimeError (
f " Unsupported ZIPPER_TYPE: { ZIPPER_TYPE !r} . "
f " Expected ' 7z ' or ' zip ' . "
f " Config zipper value: { CONFIG . get ( ' zipper ' , ' not set ' ) !r} "
)
2025-11-06 10:10:29 -07:00
for entry in seq_state . get ( " files " , [ ] ) :
file_path = target_dir / entry [ " path " ]
if file_path . exists ( ) :
os . utime ( file_path , ns = ( entry [ " mtime_ns " ] , entry [ " mtime_ns " ] ) )
2025-11-10 20:56:57 -07:00
def process_zip ( seq_dir : Path , zip_path : Path , state_path : Path , seq_state : dict , per_job_memory_limit : int | None , * , verbose : bool ) - > Sequence [ Path ] :
2025-11-06 10:10:29 -07:00
log ( " zip " , f " { seq_dir } -> { zip_path } " , verbose_only = True , verbose = verbose )
2025-11-10 20:56:57 -07:00
zip_sequence ( seq_dir , zip_path , per_job_memory_limit )
2025-11-06 10:10:29 -07:00
state_path . write_text ( json . dumps ( seq_state , indent = 2 ) )
return ( zip_path , state_path )
def process_expand ( zip_path : Path , state : dict , * , verbose : bool ) - > None :
log ( " expand " , f " { zip_path } -> { sequence_dir_for ( zip_path ) } " , verbose_only = True , verbose = verbose )
expand_sequence ( zip_path , state )
2025-11-10 20:56:57 -07:00
def run_zip ( requested_workers : int | None , * , verbose : bool ) - > int :
2025-11-06 10:10:29 -07:00
work_items : list [ tuple [ Path , Path , Path , dict ] ] = [ ]
if RENDER_ROOT . exists ( ) :
for seq_dir in find_sequence_dirs ( RENDER_ROOT ) :
2025-11-10 11:27:44 -07:00
# Get the target archive path (will be .7z if ZIPPER_TYPE is "7z")
2025-11-06 10:10:29 -07:00
zip_path = archive_path_for ( seq_dir )
state_path = state_path_for ( zip_path )
2025-11-10 11:27:44 -07:00
2025-11-10 20:56:57 -07:00
# Quick check: if archive exists, load stored state first (fast)
stored_state = load_state ( state_path )
2025-11-10 11:27:44 -07:00
# Check if we need to upgrade from .zip to .7z
old_zip_path = None
2025-11-11 12:45:44 -07:00
old_stored_state = None
2025-11-10 11:27:44 -07:00
if ZIPPER_TYPE == " 7z " :
old_zip_path = zip_path . with_suffix ( " .zip " )
if old_zip_path . exists ( ) :
old_state_path = state_path_for ( old_zip_path )
old_stored_state = load_state ( old_state_path )
2025-11-11 12:45:44 -07:00
# If old .zip exists and .7z doesn't, use old .zip's state for comparison
2025-11-10 20:56:57 -07:00
if not zip_path . exists ( ) and old_stored_state is not None :
stored_state = old_stored_state
2025-11-11 12:45:44 -07:00
# If .7z archive exists and we have stored state, do quick check before computing full state
2025-11-10 20:56:57 -07:00
if zip_path . exists ( ) and stored_state is not None :
# Quick check: if directory mtime is older than archive, likely unchanged
try :
dir_mtime = seq_dir . stat ( ) . st_mtime_ns
archive_mtime = zip_path . stat ( ) . st_mtime_ns
# If directory wasn't modified since archive was created, skip state computation
if dir_mtime < = archive_mtime :
2025-11-11 12:45:44 -07:00
# Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete)
2025-11-10 20:56:57 -07:00
if old_zip_path and old_zip_path . exists ( ) :
old_zip_path . unlink ( missing_ok = True )
old_state_path = state_path_for ( old_zip_path )
if old_state_path . exists ( ) :
old_state_path . unlink ( missing_ok = True )
2025-11-10 11:27:44 -07:00
continue
2025-11-10 20:56:57 -07:00
except OSError :
# If stat fails, fall through to full state computation
pass
2025-11-10 11:27:44 -07:00
2025-11-10 20:56:57 -07:00
# Compute current state only if we need to
seq_state = compute_state ( seq_dir )
if not seq_state [ " files " ] :
continue
# Check if state changed
if stored_state is not None and not state_changed ( seq_state , stored_state ) :
2025-11-11 12:45:44 -07:00
# Metadata matches stored state
if zip_path . exists ( ) :
# .7z exists and is up to date, clean up old .zip if it exists
if old_zip_path and old_zip_path . exists ( ) :
old_zip_path . unlink ( missing_ok = True )
old_state_path = state_path_for ( old_zip_path )
if old_state_path . exists ( ) :
old_state_path . unlink ( missing_ok = True )
elif old_zip_path and old_zip_path . exists ( ) and old_stored_state is not None :
# .7z doesn't exist, but .zip exists and metadata matches
# Keep the .zip file, don't create .7z
continue
else :
# No archive exists, but state matches (shouldn't happen, but be safe)
continue
2025-11-06 10:10:29 -07:00
work_items . append ( ( seq_dir , zip_path , state_path , seq_state ) )
if not work_items :
2025-11-06 11:02:59 -07:00
if not RENDER_ROOT . exists ( ) :
log ( " zip " , " Render root ' Renders ' not found; nothing to zip. " )
else :
log ( " zip " , " Archives already up to date; no sequences needed zipping. " )
2025-11-06 10:10:29 -07:00
return 0
2025-11-10 20:56:57 -07:00
# Calculate RAM-aware worker count based on work items
worker_count , per_job_memory_limit = max_workers ( requested_workers , work_items , verbose = verbose )
2025-11-06 10:10:29 -07:00
updated_paths : list [ Path ] = [ ]
total = len ( work_items )
completed = 0
with ThreadPoolExecutor ( max_workers = worker_count ) as executor :
future_map = {
2025-11-10 20:56:57 -07:00
executor . submit ( process_zip , seq_dir , zip_path , state_path , seq_state , per_job_memory_limit , verbose = verbose ) : seq_dir
2025-11-06 10:10:29 -07:00
for seq_dir , zip_path , state_path , seq_state in work_items
}
for future in as_completed ( future_map ) :
updated_paths . extend ( future . result ( ) )
completed + = 1
seq_dir = future_map [ future ]
rel = seq_dir . relative_to ( RENDER_ROOT )
log ( " zip " , f " { completed } / { total } { rel } " )
updated_count = len ( updated_paths ) / / 2
log ( " zip " , f " Updated { updated_count } sequence archive(s). " , verbose = verbose )
if updated_paths :
log (
" zip " ,
" Archives updated. Stage manually with `git add Renders/_zipped`, if desired. " ,
verbose_only = True ,
verbose = verbose ,
)
removed = cleanup_orphan_archives ( verbose = verbose )
if removed :
log ( " zip " , f " Removed { removed } orphan archive(s). " , verbose = verbose )
return updated_count
def run_expand ( worker_count : int , * , verbose : bool ) - > int :
if not ARCHIVE_ROOT . exists ( ) :
2025-11-06 11:02:59 -07:00
log ( " expand " , " No archives to expand (missing ' Renders/_zipped ' ). " )
2025-11-06 10:10:29 -07:00
return 0
work_items : list [ tuple [ Path , dict ] ] = [ ]
2025-11-10 11:27:44 -07:00
# Look for both .zip and .7z archives
archive_patterns = [ " *.zip " , " *.7z " ]
for pattern in archive_patterns :
for zip_path in ARCHIVE_ROOT . rglob ( pattern ) :
state_path = state_path_for ( zip_path )
seq_state = load_state ( state_path )
if seq_state is None :
log ( " expand " , f " Skipping { zip_path } (missing metadata) " )
continue
2025-11-06 10:10:29 -07:00
2025-11-10 11:27:44 -07:00
target_dir = sequence_dir_for ( zip_path )
if current_state ( target_dir ) == seq_state :
continue
2025-11-06 10:10:29 -07:00
2025-11-10 11:27:44 -07:00
work_items . append ( ( zip_path , seq_state ) )
2025-11-06 10:10:29 -07:00
if not work_items :
2025-11-06 11:02:59 -07:00
log ( " expand " , " Working folders already match archives; nothing to expand. " )
2025-11-06 10:10:29 -07:00
return 0
total = len ( work_items )
completed = 0
with ThreadPoolExecutor ( max_workers = worker_count ) as executor :
future_map = {
executor . submit ( process_expand , zip_path , seq_state , verbose = verbose ) : zip_path
for zip_path , seq_state in work_items
}
for future in as_completed ( future_map ) :
future . result ( )
completed + = 1
zip_path = future_map [ future ]
rel = zip_path . relative_to ( ARCHIVE_ROOT )
log ( " expand " , f " { completed } / { total } { rel } " )
log ( " expand " , f " Refreshed { len ( work_items ) } sequence folder(s). " , verbose = verbose )
return len ( work_items )
def cleanup_orphan_archives ( * , verbose : bool ) - > int :
if not ARCHIVE_ROOT . exists ( ) :
return 0
removed : list [ Path ] = [ ]
2025-11-10 11:27:44 -07:00
# Look for both .zip and .7z archives
archive_patterns = [ " *.zip " , " *.7z " ]
for pattern in archive_patterns :
for zip_path in ARCHIVE_ROOT . rglob ( pattern ) :
seq_dir = sequence_dir_for ( zip_path )
if seq_dir . exists ( ) :
continue
2025-11-06 10:10:29 -07:00
2025-11-10 11:27:44 -07:00
rel = zip_path . relative_to ( ARCHIVE_ROOT )
log ( " zip " , f " Removing orphan archive { rel } " , verbose_only = True , verbose = verbose )
2025-11-06 10:10:29 -07:00
2025-11-10 11:27:44 -07:00
zip_path . unlink ( missing_ok = True )
state_path = state_path_for ( zip_path )
if state_path . exists ( ) :
state_path . unlink ( )
removed . append ( zip_path )
2025-11-06 10:10:29 -07:00
if not removed :
return 0
for parent in sorted ( { p . parent for p in removed } , key = lambda p : len ( p . parts ) , reverse = True ) :
if not parent . exists ( ) :
continue
while parent != ARCHIVE_ROOT and not any ( parent . iterdir ( ) ) :
parent . rmdir ( )
parent = parent . parent
return len ( removed )
def main ( ) - > int :
args = parse_args ( )
if args . mode == " expand " :
2025-11-10 20:56:57 -07:00
# For expand mode, use simple CPU-based worker calculation
workers , _ = max_workers ( args . jobs , work_items = None , verbose = args . verbose )
2025-11-06 10:10:29 -07:00
run_expand ( workers , verbose = args . verbose )
return 0
2025-11-10 20:56:57 -07:00
# For zip mode, work items will be calculated in run_zip
updated = run_zip ( args . jobs , verbose = args . verbose )
2025-11-06 10:10:29 -07:00
return 0 if updated > = 0 else 1
if __name__ == " __main__ " :
try :
raise SystemExit ( main ( ) )
except Exception as exc : # broad to surface unexpected errors cleanly
print ( f " Sequence sync failed: { exc } " , file = sys . stderr )
raise