Compare commits

...

8 Commits

Author SHA1 Message Date
Nathan
6d4cdb0549 zipseq 7zinst of 0 bottleneck fix 2025-11-11 16:31:09 -07:00
Nathan
55a2074c62 fix zipseq ignoring zip metadata and not replacing with 7z 2025-11-11 12:45:44 -07:00
Nathan
a152d17136 work config 2025-11-11 10:27:48 -07:00
ff6d3f584f zipseq 7z instance tolerance for RAM overflow 2025-11-10 20:56:57 -07:00
f795ea3848 fix gitattributes deployment 2025-11-10 20:41:48 -07:00
f2e3a96bf2 zipseq ignore _CURRENT 2025-11-10 19:32:31 -07:00
23562de9cf deploy git attributes and ignore 2025-11-10 19:23:40 -07:00
5b409d771b Merge branch 'main' into HOME 2025-11-10 18:52:32 -07:00
6 changed files with 5576 additions and 42 deletions

View File

@@ -19134,7 +19134,6 @@ _**Agent (model default, mode Agent)**_
---
The issue: `UpdateSequences.bat` is in `Renders\04 Securing Your Seam\`, so going up one level (`%script_dir%..`) gives `Renders\` instead of the project root. Checking the batch file logic:
@@ -19160,7 +19159,6 @@ The issue: `UpdateSequences.bat` is in `Renders\04 Securing Your Seam\`, so goin
---
The issue: `UpdateSequences.bat` is in `Renders\04 Securing Your Seam\`, so going up one level (`..`) gives `Renders\` instead of the project root. `ZipSeqArchv.bat` works because it's directly in `Renders\`. Updating `UpdateSequences.bat` to go up two levels to reach the project root:
@@ -19193,7 +19191,6 @@ Lines added: 1, lines removed: 1
---
Fixed. `UpdateSequences.bat` is in subdirectories like `Renders\04 Securing Your Seam\`, so it needs to go up two levels to reach the project root, not one.
The script now goes up two levels (`..\..`) to find `P:\250827_FestivalTurf\.config\config.json`.
@@ -19202,3 +19199,636 @@ Redeploy with `ConfigLoader.ps1` to update all `UpdateSequences.bat` files in th
---
_**User (2025-11-11 19:15Z)**_
@2025-11-11_01-50Z-update-configloader-to-overwrite-gitattributes-and-gitignore.md we pulled some changes from home. the zipper is removing all non-7z archives, but it's not replacing them with new 7z archives. We need the zips to be kept if their metadata is still consistent, and only replaced with 7z if the metadata are not consistent.
---
_**Agent (model default, mode Agent)**_
---
---
Checking the upgrade logic from `.zip` to `.7z` to ensure `.zip` files are only replaced when metadata changes:
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
Checking the `zip_sequence` function for `.zip` deletion logic:
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
Fixing the logic: when a `.zip` exists, `.7z` doesn't, and metadata matches, keep the `.zip` and don't create a `.7z`. Updating the code:
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
Fixing the logic: when a `.zip` exists, `.7z` doesn't, and metadata matches, keep the `.zip` and don't create a `.7z`. Updating the code:
---
<tool-use data-tool-type="write" data-tool-name="search_replace">
<details><summary>Tool use: **code_edit** • Edit file: zip_sequences.py</summary>
**Chunk 1**
Lines added: 3, lines removed: 2
```diff
@@ -650,14 +650,15 @@
# Check if we need to upgrade from .zip to .7z
old_zip_path = None
+ old_stored_state = None
if ZIPPER_TYPE == &quot;7z&quot;:
old_zip_path = zip_path.with_suffix(&quot;.zip&quot;)
if old_zip_path.exists():
old_state_path = state_path_for(old_zip_path)
old_stored_state = load_state(old_state_path)
- # If old .zip exists and archive doesn&#39;t, we&#39;ll check state later
+ # If old .zip exists and .7z doesn&#39;t, use old .zip&#39;s state for comparison
if not zip_path.exists() and old_stored_state is not None:
stored_state = old_stored_state
- # If archive exists and we have stored state, do quick check before computing full state
+ # If .7z archive exists and we have stored state, do quick check before computing full state
if zip_path.exists() and stored_state is not None:
# Quick check: if directory mtime is older than archive, likely unchanged
```
**Chunk 2**
Lines added: 1, lines removed: 1
```diff
@@ -667,5 +668,5 @@
# If directory wasn&#39;t modified since archive was created, skip state computation
if dir_mtime &lt;= archive_mtime:
- # Still need to check for old .zip cleanup
+ # Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete)
if old_zip_path and old_zip_path.exists():
old_zip_path.unlink(missing_ok=True)
```
**Chunk 3**
Lines added: 15, lines removed: 7
```diff
@@ -685,11 +686,19 @@
# Check if state changed
if stored_state is not None and not state_changed(seq_state, stored_state):
- # Target archive is up to date, but we might still need to clean up old .zip
- if old_zip_path and old_zip_path.exists():
- old_zip_path.unlink(missing_ok=True)
- old_state_path = state_path_for(old_zip_path)
- if old_state_path.exists():
- old_state_path.unlink(missing_ok=True)
- continue
+ # Metadata matches stored state
+ if zip_path.exists():
+ # .7z exists and is up to date, clean up old .zip if it exists
+ if old_zip_path and old_zip_path.exists():
+ old_zip_path.unlink(missing_ok=True)
+ old_state_path = state_path_for(old_zip_path)
+ if old_state_path.exists():
+ old_state_path.unlink(missing_ok=True)
+ elif old_zip_path and old_zip_path.exists() and old_stored_state is not None:
+ # .7z doesn&#39;t exist, but .zip exists and metadata matches
+ # Keep the .zip file, don&#39;t create .7z
+ continue
+ else:
+ # No archive exists, but state matches (shouldn&#39;t happen, but be safe)
+ continue
work_items.append((seq_dir, zip_path, state_path, seq_state))
```
</details>
</tool-use>
---
---
Checking for linting errors:
---
<tool-use data-tool-type="generic" data-tool-name="read_lints">
<details>
<summary>Tool use: **read_lints** • Read lints for 1 file</summary>
Lint paths:
- `zip_sequences.py`
Lint results
**No lint errors found**
</details>
</tool-use>
---
---
Fixed the logic. The script now:
1. **Keeps `.zip` files when metadata matches**: If a `.zip` exists, no `.7z` exists, and the metadata matches the current state, it keeps the `.zip` and does not create a `.7z`.
2. **Only replaces `.zip` with `.7z` when metadata changes**: If the metadata doesn't match (files have changed), it creates a new `.7z` and deletes the old `.zip`.
3. **Cleans up obsolete `.zip` files**: If a `.7z` already exists and is up-to-date, it deletes any old `.zip` files.
The key change is on lines 696-699: when a `.zip` exists, no `.7z` exists, and the metadata matches, the script skips creating a `.7z` and keeps the `.zip` file.
This ensures that `.zip` files are only replaced with `.7z` when the files have actually changed, not just because the script is running.
---
_**User (2025-11-11 19:15Z)**_
Very low CPU and RAM allocation on this. Let's make Max7zInst=0 be our original balls-to-the-walls process.
[zip] RAM: 102.2GB available (80% of total), 40.9GB for compression (40%), 8.0GB per job (dict: 1024MB) → RAM limit: 5, requested: None, final: 5
---
_**Agent (model default, mode Agent)**_
---
Updating the auto-calculation logic for `Max7zInst=0` to be more aggressive, using more CPU and RAM:
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
---
Updating the auto-calculation to be more aggressive when `Max7zInst=0`:
---
<tool-use data-tool-type="write" data-tool-name="search_replace">
<details><summary>Tool use: **code_edit** • Edit file: zip_sequences.py</summary>
**Chunk 1**
Lines added: 6, lines removed: 5
```diff
@@ -282,12 +282,13 @@
# Auto-calculate based on RAM if Max7zInst not configured
+ # &quot;Balls-to-the-walls&quot; mode: use maximum resources
if available_ram is not None:
- # 7z uses ~2-3x dictionary size in RAM, but with overhead use 8x for safety
- # This accounts for 7z&#39;s internal buffers, OS overhead, and other processes
- FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 8 * 1024 * 1024 # 8GB per job
+ # 7z uses ~2-3x dictionary size in RAM, use 3x for aggressive mode
+ # This is more realistic and allows more concurrent workers
+ FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 3 * 1024 * 1024 # 3GB per job
# available_ram is already 80% of total (20% reserved for system)
- # Use only 40% of that for compression jobs (very conservative to prevent swapping)
- compression_ram = int(available_ram * 0.4)
+ # Use 95% of available RAM for compression jobs (aggressive mode)
+ compression_ram = int(available_ram * 0.95)
# Calculate worker limit based on fixed per-job RAM
```
**Chunk 2**
Lines added: 1, lines removed: 1
```diff
@@ -305,5 +306,5 @@
log(
&quot;zip&quot;,
- f&quot;RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (40%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → &quot;
+ f&quot;RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (95%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → &quot;
f&quot;RAM limit: {ram_limit}, requested: {requested}, final: {final_limit}&quot;,
verbose_only=True,
```
</details>
</tool-use>
---
---
<tool-use data-tool-type="generic" data-tool-name="read_lints">
<details>
<summary>Tool use: **read_lints** • Read lints for 1 file</summary>
Lint paths:
- `zip_sequences.py`
Lint results
**No lint errors found**
</details>
</tool-use>
---
---
---
Updated the auto-calculation for `Max7zInst=0` to be more aggressive.
**Changes:**
1. **Increased RAM usage**: From 40% to 95% of available RAM for compression
2. **Reduced per-job RAM estimate**: From 8GB to 3GB per job (more realistic for a 1GB dictionary)
**Results for your system:**
- **Before**: 40.9GB for compression ÷ 8GB per job = **5 workers**
- **After**: ~97GB for compression ÷ 3GB per job = **~32 workers**
This should use more CPU and RAM. The dictionary size remains 1GB, so compression ratio is unchanged.
With `Max7zInst=0`, the script will use up to ~32 concurrent 7z instances, maximizing throughput. If you want to limit it, set `Max7zInst` to a specific number in your config.
---
_**User (2025-11-11 21:54Z)**_
it's now trying to do 32 jobs with 3gb allocated to each, even when there's only one sequence to zip. let's make it account for the number of 7z jobs there are to do.
---
_**Agent (model default, mode Agent)**_
---
---
Updating the logic to cap workers at the number of sequences to zip. Checking how `max_workers` is called:
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
<tool-use data-tool-type="grep" data-tool-name="grep">
<details>
<summary>Tool use: **grep** • Grep for "def run_zip" in "zip_sequences.py" • 1 match</summary>
Output mode: content
| File | Content | Line |
|------|------|------|
| `zip_sequences.py` | `def run_zip(requested_workers: int \| None, *, verbose: bool) -> int:` | L639 |
</details>
</tool-use>
---
---
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
---
Updating `max_workers` to cap workers at the number of work items:
---
<tool-use data-tool-type="read" data-tool-name="read_file">
<details><summary>Tool use: **read_file** • Read file: zip_sequences.py</summary>
</details>
</tool-use>
---
---
Capping workers at the number of work items:
---
<tool-use data-tool-type="write" data-tool-name="search_replace">
<details><summary>Tool use: **code_edit** • Edit file: zip_sequences.py</summary>
**Chunk 1**
Lines added: 5, lines removed: undefined
```diff
@@ -295,4 +295,9 @@
ram_limit = max(1, compression_ram // FIXED_RAM_PER_JOB)
+ # Cap at number of actual work items (can&#39;t have more workers than jobs)
+ num_work_items = len(work_items) if work_items else 0
+ if num_work_items &gt; 0:
+ ram_limit = min(ram_limit, num_work_items)
+
# Use RAM limit directly (no CPU limit)
final_limit = ram_limit
```
**Chunk 2**
Lines added: 1, lines removed: 1
```diff
@@ -307,5 +312,5 @@
&quot;zip&quot;,
f&quot;RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (95%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → &quot;
- f&quot;RAM limit: {ram_limit}, requested: {requested}, final: {final_limit}&quot;,
+ f&quot;RAM limit: {ram_limit}, work items: {num_work_items}, requested: {requested}, final: {final_limit}&quot;,
verbose_only=True,
verbose=verbose
```
</details>
</tool-use>
---
---
Updating the configured `MAX_7Z_INSTANCES` case to also cap at the number of work items:
---
<tool-use data-tool-type="write" data-tool-name="search_replace">
<details><summary>Tool use: **code_edit** • Edit file: zip_sequences.py</summary>
**Chunk 1**
Lines added: 4, lines removed: 1
```diff
@@ -265,6 +265,9 @@
# Check if Max7zInst is configured
if MAX_7Z_INSTANCES is not None:
- # Use configured maximum instances, but still respect user&#39;s --jobs
+ # Use configured maximum instances, but still respect user&#39;s --jobs and work items
final_limit = MAX_7Z_INSTANCES
+ num_work_items = len(work_items) if work_items else 0
+ if num_work_items &gt; 0:
+ final_limit = min(final_limit, num_work_items)
if requested and requested &gt; 0:
final_limit = min(final_limit, requested)
```
**Chunk 2**
Lines added: 1, lines removed: 1
```diff
@@ -274,5 +277,5 @@
&quot;zip&quot;,
f&quot;Using Max7zInst={MAX_7Z_INSTANCES} from config → &quot;
- f&quot;requested: {requested}, final: {final_limit}&quot;,
+ f&quot;work items: {num_work_items}, requested: {requested}, final: {final_limit}&quot;,
verbose_only=True,
verbose=verbose
```
</details>
</tool-use>
---
---
Updating the default case when RAM detection fails:
---
<tool-use data-tool-type="write" data-tool-name="search_replace">
<details><summary>Tool use: **code_edit** • Edit file: zip_sequences.py</summary>
**Chunk 1**
Lines added: 12, lines removed: 3
```diff
@@ -323,9 +323,18 @@
# RAM detection failed, use a safe default (no CPU limit)
- if verbose:
- log(&quot;zip&quot;, &quot;RAM detection failed and Max7zInst not set, using default worker limit of 4&quot;, verbose_only=True, verbose=verbose)
default_limit = 4
+ num_work_items = len(work_items) if work_items else 0
+ if num_work_items &gt; 0:
+ default_limit = min(default_limit, num_work_items)
if requested and requested &gt; 0:
- default_limit = requested
+ default_limit = min(default_limit, requested)
+ if verbose:
+ log(
+ &quot;zip&quot;,
+ f&quot;RAM detection failed and Max7zInst not set, using default worker limit → &quot;
+ f&quot;work items: {num_work_items}, requested: {requested}, final: {default_limit}&quot;,
+ verbose_only=True,
+ verbose=verbose
+ )
return (default_limit, fixed_dict_size_bytes)
```
</details>
</tool-use>
---
---
<tool-use data-tool-type="generic" data-tool-name="read_lints">
<details>
<summary>Tool use: **read_lints** • Read lints for 1 file</summary>
Lint paths:
- `zip_sequences.py`
Lint results
**No lint errors found**
</details>
</tool-use>
---
---
---
Updated the code to cap worker count at the number of sequences to zip. Changes:
1. **Auto-calculate mode (`Max7zInst=0`)**: Caps workers at the number of work items
2. **Configured mode (`Max7zInst=N`)**: Also caps at the number of work items
3. **Default fallback**: Also caps at the number of work items
Now when there's only 1 sequence to zip, it will use 1 worker instead of trying to spawn 32. The verbose output will show:
- `work items: 1`
- `final: 1` (instead of 32)
The script still uses aggressive RAM allocation when there are many sequences, but won't exceed the actual number of jobs.
---

View File

@@ -214,7 +214,9 @@ if ($MyInvocation.InvocationName -ne '.') {
@{ Name = 'UpdateAllSequences.bat'; Source = Join-Path -Path $structDir -ChildPath 'UpdateAllSequences.bat' },
@{ Name = 'ZipSeqArchv.bat'; Source = Join-Path -Path $structDir -ChildPath 'ZipSeqArchv.bat' },
@{ Name = 'UnzipSeqArchv.bat'; Source = Join-Path -Path $structDir -ChildPath 'UnzipSeqArchv.bat' },
@{ Name = 'NewDaily.bat'; Source = Join-Path -Path $structDir -ChildPath 'NewDaily.bat' }
@{ Name = 'NewDaily.bat'; Source = Join-Path -Path $structDir -ChildPath 'NewDaily.bat' },
@{ Name = '.gitattributes'; Source = Join-Path -Path $structDir -ChildPath 'components\gitattributes' },
@{ Name = '.gitignore'; Source = Join-Path -Path $structDir -ChildPath 'components\gitignore' }
)
# Config files to deploy to projectroot\.config\
@@ -253,7 +255,7 @@ if ($MyInvocation.InvocationName -ne '.') {
foreach ($spec in $specs) {
Write-Host "`n=== Updating $($spec.Name) ===" -ForegroundColor Magenta
$targets = Get-ChildItem -LiteralPath $resolvedProject -Recurse -Filter $spec.Name -File -ErrorAction SilentlyContinue
$targets = Get-ChildItem -LiteralPath $resolvedProject -Recurse -Filter $spec.Name -File -Force -ErrorAction SilentlyContinue
$targets = $targets | Where-Object { $_.FullName -ne $spec.Source }
if (-not $targets) {
@@ -274,6 +276,11 @@ if ($MyInvocation.InvocationName -ne '.') {
foreach ($target in $targets) {
try {
Copy-Item -Path $spec.Source -Destination $target.FullName -Force
# Set hidden attribute for .gitattributes and .gitignore files
if ($spec.Name -eq '.gitattributes' -or $spec.Name -eq '.gitignore') {
$file = Get-Item -LiteralPath $target.FullName -Force
$file.Attributes = $file.Attributes -bor [System.IO.FileAttributes]::Hidden
}
Write-Host "[OK] $($target.FullName)" -ForegroundColor Green
$updated++
@@ -310,6 +317,28 @@ if ($MyInvocation.InvocationName -ne '.') {
Write-Host "All batch files refreshed successfully." -ForegroundColor Green
# Deploy .gitattributes and .gitignore to project root if they don't exist
Write-Host "`n=== Deploying .gitattributes and .gitignore to project root ===" -ForegroundColor Magenta
$gitFiles = @(
@{ Name = '.gitattributes'; Source = Join-Path -Path $structDir -ChildPath 'components\gitattributes' },
@{ Name = '.gitignore'; Source = Join-Path -Path $structDir -ChildPath 'components\gitignore' }
)
foreach ($gitFile in $gitFiles) {
$targetPath = Join-Path -Path $resolvedProject -ChildPath $gitFile.Name
try {
Copy-Item -Path $gitFile.Source -Destination $targetPath -Force
# Set hidden attribute on the file
$file = Get-Item -LiteralPath $targetPath -Force
$file.Attributes = $file.Attributes -bor [System.IO.FileAttributes]::Hidden
Write-Host "[OK] $targetPath" -ForegroundColor Green
}
catch {
Write-Host "[FAIL] $targetPath" -ForegroundColor Red
Write-Host " $($_.Exception.Message)" -ForegroundColor DarkRed
exit 1
}
}
# Deploy config files to projectroot\.config\
Write-Host "`n=== Deploying config files to .config\ ===" -ForegroundColor Magenta
foreach ($asset in $configAssets) {

View File

@@ -50,4 +50,7 @@
*.wav filter=lfs diff=lfs merge=lfs -text
*.docx filter=lfs diff=lfs merge=lfs -text
*.MOV filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
*.sbs filter=lfs diff=lfs merge=lfs -text
*.sbsar filter=lfs diff=lfs merge=lfs -text
*.spp filter=lfs diff=lfs merge=lfs -text

View File

@@ -2,6 +2,7 @@
"dailyFormat": "daily_YYMMDD",
"structDir": "A:\\1 Amazon_Active_Projects\\3 ProjectStructure",
"zipper": "7z",
"compression": 9
"compression": 9,
"Max7zInst": 0
}

View File

@@ -12,6 +12,7 @@ from __future__ import annotations
import argparse
import json
import os
import platform
import shutil
import subprocess
import sys
@@ -21,6 +22,22 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Iterator, Sequence
# Try to import psutil for cross-platform RAM detection
try:
import psutil
HAS_PSUTIL = True
except ImportError:
HAS_PSUTIL = False
# For Windows fallback
if platform.system() == "Windows":
try:
import ctypes
HAS_CTYPES = True
except ImportError:
HAS_CTYPES = False
else:
HAS_CTYPES = False
RENDER_ROOT = Path("Renders")
ARCHIVE_ROOT = RENDER_ROOT / "_zipped"
@@ -37,6 +54,7 @@ DEFAULT_CONFIG = {
"zipper": "7z",
"compression": 9,
"dailyFormat": "daily_YYMMDD",
"Max7zInst": 0, # Maximum concurrent 7z instances (0 = auto-calculate)
}
@@ -89,6 +107,19 @@ if not isinstance(COMPRESSION_LEVEL, int):
COMPRESSION_LEVEL = 9
COMPRESSION_LEVEL = max(0, min(9, COMPRESSION_LEVEL))
MAX_7Z_INSTANCES = CONFIG.get("Max7zInst", 0)
if MAX_7Z_INSTANCES is not None:
if isinstance(MAX_7Z_INSTANCES, str):
try:
MAX_7Z_INSTANCES = int(MAX_7Z_INSTANCES)
except ValueError:
MAX_7Z_INSTANCES = 0
if not isinstance(MAX_7Z_INSTANCES, int) or MAX_7Z_INSTANCES < 1:
MAX_7Z_INSTANCES = 0
# Treat 0 as None (auto-calculate)
if MAX_7Z_INSTANCES == 0:
MAX_7Z_INSTANCES = None
SEVEN_Z_EXE: str | None = None
if ZIPPER_TYPE == "7z":
SEVEN_Z_EXE = shutil.which("7z") or shutil.which("7za")
@@ -107,12 +138,237 @@ def parse_args() -> argparse.Namespace:
return parser.parse_args()
def max_workers(requested: int | None) -> int:
def get_available_ram() -> int | None:
"""Get available RAM in bytes, reserving 20% for system.
Returns:
Available RAM in bytes, or None if detection fails.
"""
try:
if HAS_PSUTIL:
# Use psutil for cross-platform RAM detection
mem = psutil.virtual_memory()
# Reserve 20% for system, use 80% for compression jobs
available = int(mem.total * 0.8)
return available
elif HAS_CTYPES and platform.system() == "Windows":
# Windows fallback using ctypes
class MEMORYSTATUSEX(ctypes.Structure):
_fields_ = [
("dwLength", ctypes.c_ulong),
("dwMemoryLoad", ctypes.c_ulong),
("ullTotalPhys", ctypes.c_ulonglong),
("ullAvailPhys", ctypes.c_ulonglong),
("ullTotalPageFile", ctypes.c_ulonglong),
("ullAvailPageFile", ctypes.c_ulonglong),
("ullTotalVirtual", ctypes.c_ulonglong),
("ullAvailVirtual", ctypes.c_ulonglong),
("ullAvailExtendedVirtual", ctypes.c_ulonglong),
]
kernel32 = ctypes.windll.kernel32
kernel32.GlobalMemoryStatusEx.argtypes = [ctypes.POINTER(MEMORYSTATUSEX)]
kernel32.GlobalMemoryStatusEx.restype = ctypes.c_bool
mem_status = MEMORYSTATUSEX()
mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
if kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
# Reserve 20% for system, use 80% for compression jobs
available = int(mem_status.ullTotalPhys * 0.8)
return available
except Exception:
pass
return None
def estimate_ram_per_job(seq_dir: Path, seq_state: dict) -> int:
"""Estimate RAM usage per compression job based on folder size.
Args:
seq_dir: Path to the sequence directory
seq_state: State dictionary containing file information
Returns:
Estimated RAM usage in bytes
"""
# Calculate total folder size from seq_state
total_bytes = sum(entry.get("size", 0) for entry in seq_state.get("files", []))
if ZIPPER_TYPE == "7z":
# Base RAM: 500MB per job
base_ram = 500 * 1024 * 1024 # 500 MB
# Compression factor: 7z can use significant RAM, especially for large files
# Use 0.15x factor (conservative estimate accounting for 7z's 80% usage)
compression_factor = 0.15
# For very large folders (>10GB), cap at 8GB per job
max_ram_per_job = 8 * 1024 * 1024 * 1024 # 8 GB
large_folder_threshold = 10 * 1024 * 1024 * 1024 # 10 GB
if total_bytes > large_folder_threshold:
estimated_ram = max_ram_per_job
else:
estimated_ram = max(base_ram, int(total_bytes * compression_factor))
estimated_ram = min(estimated_ram, max_ram_per_job)
return estimated_ram
else:
# zip compression is more memory-efficient
# Conservative estimate: 1GB per job
return 1024 * 1024 * 1024 # 1 GB
def max_workers(
requested: int | None,
work_items: list[tuple[Path, Path, Path, dict]] | None = None,
*,
verbose: bool = False
) -> tuple[int, int | None]:
"""Calculate maximum worker count based on CPU and RAM constraints.
Args:
requested: User-requested worker count (from --jobs)
work_items: List of work items (seq_dir, zip_path, state_path, seq_state)
verbose: Whether to log RAM-based calculations
Returns:
Tuple of (worker_count, per_job_memory_limit_bytes)
per_job_memory_limit_bytes is None if not using 7z or RAM detection failed
"""
cpu = os.cpu_count() or 1
limit = max(1, min(8, cpu))
cpu_limit = max(1, min(8, cpu))
if requested and requested > 0:
return min(requested, max(1, cpu))
return limit
cpu_limit = min(requested, max(1, cpu))
# If no work items provided, return CPU-based limit
if work_items is None or len(work_items) == 0:
return (cpu_limit, None)
# Try to calculate RAM-based limit
available_ram = get_available_ram()
if available_ram is None:
# RAM detection failed, fall back to CPU limit
if verbose:
log("zip", "RAM detection failed, using CPU-based worker limit", verbose_only=True, verbose=verbose)
return (cpu_limit, None)
# For 7z: use fixed dictionary size and calculate workers
if ZIPPER_TYPE == "7z":
# Fixed dictionary size: 1GB (1024MB)
FIXED_DICT_SIZE_MB = 1024
fixed_dict_size_bytes = FIXED_DICT_SIZE_MB * 1024 * 1024
# Check if Max7zInst is configured
if MAX_7Z_INSTANCES is not None:
# Use configured maximum instances, but still respect user's --jobs and work items
final_limit = MAX_7Z_INSTANCES
num_work_items = len(work_items) if work_items else 0
if num_work_items > 0:
final_limit = min(final_limit, num_work_items)
if requested and requested > 0:
final_limit = min(final_limit, requested)
if verbose:
log(
"zip",
f"Using Max7zInst={MAX_7Z_INSTANCES} from config → "
f"work items: {num_work_items}, requested: {requested}, final: {final_limit}",
verbose_only=True,
verbose=verbose
)
return (final_limit, fixed_dict_size_bytes)
# Auto-calculate based on RAM if Max7zInst not configured
# "Balls-to-the-walls" mode: use maximum resources
if available_ram is not None:
# 7z uses ~2-3x dictionary size in RAM, use 3x for aggressive mode
# This is more realistic and allows more concurrent workers
FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 3 * 1024 * 1024 # 3GB per job
# available_ram is already 80% of total (20% reserved for system)
# Use 95% of available RAM for compression jobs (aggressive mode)
compression_ram = int(available_ram * 0.95)
# Calculate worker limit based on fixed per-job RAM
ram_limit = max(1, compression_ram // FIXED_RAM_PER_JOB)
# Cap at number of actual work items (can't have more workers than jobs)
num_work_items = len(work_items) if work_items else 0
if num_work_items > 0:
ram_limit = min(ram_limit, num_work_items)
# Use RAM limit directly (no CPU limit)
final_limit = ram_limit
if requested and requested > 0:
final_limit = min(final_limit, requested)
if verbose:
ram_gb = available_ram / (1024 ** 3)
compression_ram_gb = compression_ram / (1024 ** 3)
ram_per_job_gb = FIXED_RAM_PER_JOB / (1024 ** 3)
log(
"zip",
f"RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (95%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → "
f"RAM limit: {ram_limit}, work items: {num_work_items}, requested: {requested}, final: {final_limit}",
verbose_only=True,
verbose=verbose
)
return (final_limit, fixed_dict_size_bytes)
# RAM detection failed, use a safe default (no CPU limit)
default_limit = 4
num_work_items = len(work_items) if work_items else 0
if num_work_items > 0:
default_limit = min(default_limit, num_work_items)
if requested and requested > 0:
default_limit = min(default_limit, requested)
if verbose:
log(
"zip",
f"RAM detection failed and Max7zInst not set, using default worker limit → "
f"work items: {num_work_items}, requested: {requested}, final: {default_limit}",
verbose_only=True,
verbose=verbose
)
return (default_limit, fixed_dict_size_bytes)
# For zip compression, use existing estimation-based approach
# Estimate RAM per job for each work item
ram_estimates = []
for seq_dir, zip_path, state_path, seq_state in work_items:
try:
estimated_ram = estimate_ram_per_job(seq_dir, seq_state)
ram_estimates.append(estimated_ram)
except Exception:
# If estimation fails, use fallback estimate
ram_estimates.append(1024 * 1024 * 1024) # 1GB fallback for zip
if not ram_estimates:
return (cpu_limit, None)
max_ram_per_job = max(ram_estimates)
ram_limit = max(1, available_ram // max_ram_per_job)
ram_limit = min(ram_limit, 6) # Conservative limit for zip
final_limit = min(cpu_limit, ram_limit)
if verbose:
ram_gb = available_ram / (1024 ** 3)
max_ram_gb = max_ram_per_job / (1024 ** 3)
log(
"zip",
f"RAM: {ram_gb:.1f}GB available, ~{max_ram_gb:.1f}GB per job → "
f"RAM limit: {ram_limit}, CPU limit: {cpu_limit}, final: {final_limit}",
verbose_only=True,
verbose=verbose
)
return (final_limit, None)
def log(mode: str, message: str, *, verbose_only: bool = False, verbose: bool = False) -> None:
@@ -122,13 +378,13 @@ def log(mode: str, message: str, *, verbose_only: bool = False, verbose: bool =
def is_archive_path(path: Path) -> bool:
return any(part == "_archive" for part in path.parts)
return any(part in ("_archive", "_CURRENT") for part in path.parts)
def find_sequence_dirs(root: Path) -> Iterator[Path]:
for dirpath, dirnames, filenames in os.walk(root):
path = Path(dirpath)
dirnames[:] = [d for d in dirnames if d != "_archive"]
dirnames[:] = [d for d in dirnames if d not in ("_archive", "_CURRENT")]
if is_archive_path(path):
continue
has_frames = any(Path(dirpath, f).suffix.lower() in SEQUENCE_EXTENSIONS for f in filenames)
@@ -139,7 +395,7 @@ def find_sequence_dirs(root: Path) -> Iterator[Path]:
def iter_sequence_files(seq_dir: Path) -> Iterator[Path]:
for dirpath, dirnames, filenames in os.walk(seq_dir):
path = Path(dirpath)
dirnames[:] = [d for d in dirnames if d != "_archive"]
dirnames[:] = [d for d in dirnames if d not in ("_archive", "_CURRENT")]
if is_archive_path(path):
continue
for filename in filenames:
@@ -200,7 +456,7 @@ def state_path_for(zip_path: Path) -> Path:
return zip_path.with_suffix(zip_path.suffix + STATE_SUFFIX)
def zip_sequence(seq_dir: Path, zip_path: Path) -> None:
def zip_sequence(seq_dir: Path, zip_path: Path, per_job_memory_limit: int | None = None) -> None:
if ZIPPER_TYPE == "7z":
if SEVEN_Z_EXE is None:
raise RuntimeError(
@@ -258,9 +514,18 @@ def zip_sequence(seq_dir: Path, zip_path: Path) -> None:
"-bb0", # Suppress progress output
f"-mx={COMPRESSION_LEVEL}",
"-t7z", # Use 7z format, not zip
]
# Add fixed dictionary size if specified (7z memory usage is controlled by dictionary size)
if per_job_memory_limit is not None:
# per_job_memory_limit is actually the fixed dictionary size in bytes
dict_size_mb = per_job_memory_limit // (1024 * 1024)
cmd.append(f"-md={dict_size_mb}m")
cmd.extend([
str(temp_zip_abs),
f"@{list_file_abs}",
]
])
result = subprocess.run(
cmd,
cwd=seq_dir,
@@ -376,9 +641,9 @@ def expand_sequence(zip_path: Path, seq_state: dict) -> None:
os.utime(file_path, ns=(entry["mtime_ns"], entry["mtime_ns"]))
def process_zip(seq_dir: Path, zip_path: Path, state_path: Path, seq_state: dict, *, verbose: bool) -> Sequence[Path]:
def process_zip(seq_dir: Path, zip_path: Path, state_path: Path, seq_state: dict, per_job_memory_limit: int | None, *, verbose: bool) -> Sequence[Path]:
log("zip", f"{seq_dir} -> {zip_path}", verbose_only=True, verbose=verbose)
zip_sequence(seq_dir, zip_path)
zip_sequence(seq_dir, zip_path, per_job_memory_limit)
state_path.write_text(json.dumps(seq_state, indent=2))
return (zip_path, state_path)
@@ -388,44 +653,71 @@ def process_expand(zip_path: Path, state: dict, *, verbose: bool) -> None:
expand_sequence(zip_path, state)
def run_zip(worker_count: int, *, verbose: bool) -> int:
def run_zip(requested_workers: int | None, *, verbose: bool) -> int:
work_items: list[tuple[Path, Path, Path, dict]] = []
if RENDER_ROOT.exists():
for seq_dir in find_sequence_dirs(RENDER_ROOT):
seq_state = compute_state(seq_dir)
if not seq_state["files"]:
continue
# Get the target archive path (will be .7z if ZIPPER_TYPE is "7z")
zip_path = archive_path_for(seq_dir)
state_path = state_path_for(zip_path)
# Quick check: if archive exists, load stored state first (fast)
stored_state = load_state(state_path)
# Check if we need to upgrade from .zip to .7z
old_zip_path = None
old_stored_state = None
if ZIPPER_TYPE == "7z":
# Check if an old .zip file exists
old_zip_path = zip_path.with_suffix(".zip")
if old_zip_path.exists():
# Check if the old .zip's metadata matches current state
old_state_path = state_path_for(old_zip_path)
old_stored_state = load_state(old_state_path)
if not state_changed(seq_state, old_stored_state):
# Old .zip is up to date, skip conversion
continue
# Old .zip is out of date, will be replaced with .7z
# If old .zip exists and .7z doesn't, use old .zip's state for comparison
if not zip_path.exists() and old_stored_state is not None:
stored_state = old_stored_state
# Check if the target archive (e.g., .7z) already exists and is up to date
stored_state = load_state(state_path)
if not state_changed(seq_state, stored_state):
# Target archive is up to date, but we might still need to clean up old .zip
if old_zip_path and old_zip_path.exists():
# Old .zip exists but we have a newer .7z, remove the old one
old_zip_path.unlink(missing_ok=True)
old_state_path = state_path_for(old_zip_path)
if old_state_path.exists():
old_state_path.unlink(missing_ok=True)
# If .7z archive exists and we have stored state, do quick check before computing full state
if zip_path.exists() and stored_state is not None:
# Quick check: if directory mtime is older than archive, likely unchanged
try:
dir_mtime = seq_dir.stat().st_mtime_ns
archive_mtime = zip_path.stat().st_mtime_ns
# If directory wasn't modified since archive was created, skip state computation
if dir_mtime <= archive_mtime:
# Still need to check for old .zip cleanup (we have .7z, so .zip is obsolete)
if old_zip_path and old_zip_path.exists():
old_zip_path.unlink(missing_ok=True)
old_state_path = state_path_for(old_zip_path)
if old_state_path.exists():
old_state_path.unlink(missing_ok=True)
continue
except OSError:
# If stat fails, fall through to full state computation
pass
# Compute current state only if we need to
seq_state = compute_state(seq_dir)
if not seq_state["files"]:
continue
# Check if state changed
if stored_state is not None and not state_changed(seq_state, stored_state):
# Metadata matches stored state
if zip_path.exists():
# .7z exists and is up to date, clean up old .zip if it exists
if old_zip_path and old_zip_path.exists():
old_zip_path.unlink(missing_ok=True)
old_state_path = state_path_for(old_zip_path)
if old_state_path.exists():
old_state_path.unlink(missing_ok=True)
elif old_zip_path and old_zip_path.exists() and old_stored_state is not None:
# .7z doesn't exist, but .zip exists and metadata matches
# Keep the .zip file, don't create .7z
continue
else:
# No archive exists, but state matches (shouldn't happen, but be safe)
continue
work_items.append((seq_dir, zip_path, state_path, seq_state))
@@ -436,6 +728,9 @@ def run_zip(worker_count: int, *, verbose: bool) -> int:
log("zip", "Archives already up to date; no sequences needed zipping.")
return 0
# Calculate RAM-aware worker count based on work items
worker_count, per_job_memory_limit = max_workers(requested_workers, work_items, verbose=verbose)
updated_paths: list[Path] = []
total = len(work_items)
@@ -443,7 +738,7 @@ def run_zip(worker_count: int, *, verbose: bool) -> int:
with ThreadPoolExecutor(max_workers=worker_count) as executor:
future_map = {
executor.submit(process_zip, seq_dir, zip_path, state_path, seq_state, verbose=verbose): seq_dir
executor.submit(process_zip, seq_dir, zip_path, state_path, seq_state, per_job_memory_limit, verbose=verbose): seq_dir
for seq_dir, zip_path, state_path, seq_state in work_items
}
@@ -556,13 +851,15 @@ def cleanup_orphan_archives(*, verbose: bool) -> int:
def main() -> int:
args = parse_args()
workers = max_workers(args.jobs)
if args.mode == "expand":
# For expand mode, use simple CPU-based worker calculation
workers, _ = max_workers(args.jobs, work_items=None, verbose=args.verbose)
run_expand(workers, verbose=args.verbose)
return 0
updated = run_zip(workers, verbose=args.verbose)
# For zip mode, work items will be calculated in run_zip
updated = run_zip(args.jobs, verbose=args.verbose)
return 0 if updated >= 0 else 1