zipseq 7zinst of 0 bottleneck fix

This commit is contained in:
Nathan
2025-11-11 16:31:09 -07:00
parent 55a2074c62
commit 6d4cdb0549
2 changed files with 461 additions and 12 deletions

View File

@@ -264,8 +264,11 @@ def max_workers(
# Check if Max7zInst is configured
if MAX_7Z_INSTANCES is not None:
# Use configured maximum instances, but still respect user's --jobs
# Use configured maximum instances, but still respect user's --jobs and work items
final_limit = MAX_7Z_INSTANCES
num_work_items = len(work_items) if work_items else 0
if num_work_items > 0:
final_limit = min(final_limit, num_work_items)
if requested and requested > 0:
final_limit = min(final_limit, requested)
@@ -273,7 +276,7 @@ def max_workers(
log(
"zip",
f"Using Max7zInst={MAX_7Z_INSTANCES} from config → "
f"requested: {requested}, final: {final_limit}",
f"work items: {num_work_items}, requested: {requested}, final: {final_limit}",
verbose_only=True,
verbose=verbose
)
@@ -281,18 +284,24 @@ def max_workers(
return (final_limit, fixed_dict_size_bytes)
# Auto-calculate based on RAM if Max7zInst not configured
# "Balls-to-the-walls" mode: use maximum resources
if available_ram is not None:
# 7z uses ~2-3x dictionary size in RAM, but with overhead use 8x for safety
# This accounts for 7z's internal buffers, OS overhead, and other processes
FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 8 * 1024 * 1024 # 8GB per job
# 7z uses ~2-3x dictionary size in RAM, use 3x for aggressive mode
# This is more realistic and allows more concurrent workers
FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 3 * 1024 * 1024 # 3GB per job
# available_ram is already 80% of total (20% reserved for system)
# Use only 40% of that for compression jobs (very conservative to prevent swapping)
compression_ram = int(available_ram * 0.4)
# Use 95% of available RAM for compression jobs (aggressive mode)
compression_ram = int(available_ram * 0.95)
# Calculate worker limit based on fixed per-job RAM
ram_limit = max(1, compression_ram // FIXED_RAM_PER_JOB)
# Cap at number of actual work items (can't have more workers than jobs)
num_work_items = len(work_items) if work_items else 0
if num_work_items > 0:
ram_limit = min(ram_limit, num_work_items)
# Use RAM limit directly (no CPU limit)
final_limit = ram_limit
if requested and requested > 0:
@@ -304,8 +313,8 @@ def max_workers(
ram_per_job_gb = FIXED_RAM_PER_JOB / (1024 ** 3)
log(
"zip",
f"RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (40%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → "
f"RAM limit: {ram_limit}, requested: {requested}, final: {final_limit}",
f"RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (95%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → "
f"RAM limit: {ram_limit}, work items: {num_work_items}, requested: {requested}, final: {final_limit}",
verbose_only=True,
verbose=verbose
)
@@ -313,11 +322,20 @@ def max_workers(
return (final_limit, fixed_dict_size_bytes)
# RAM detection failed, use a safe default (no CPU limit)
if verbose:
log("zip", "RAM detection failed and Max7zInst not set, using default worker limit of 4", verbose_only=True, verbose=verbose)
default_limit = 4
num_work_items = len(work_items) if work_items else 0
if num_work_items > 0:
default_limit = min(default_limit, num_work_items)
if requested and requested > 0:
default_limit = requested
default_limit = min(default_limit, requested)
if verbose:
log(
"zip",
f"RAM detection failed and Max7zInst not set, using default worker limit → "
f"work items: {num_work_items}, requested: {requested}, final: {default_limit}",
verbose_only=True,
verbose=verbose
)
return (default_limit, fixed_dict_size_bytes)
# For zip compression, use existing estimation-based approach