zipseq 7zinst of 0 bottleneck fix

2025-11-11 16:31:09 -07:00
parent 55a2074c62
commit 6d4cdb0549
2 changed files with 461 additions and 12 deletions
--- a/zip_sequences.py
+++ b/zip_sequences.py
@@ -264,8 +264,11 @@ def max_workers(
        
        # Check if Max7zInst is configured
        if MAX_7Z_INSTANCES is not None:
-            # Use configured maximum instances, but still respect user's --jobs
+            # Use configured maximum instances, but still respect user's --jobs and work items
            final_limit = MAX_7Z_INSTANCES
+            num_work_items = len(work_items) if work_items else 0
+            if num_work_items > 0:
+                final_limit = min(final_limit, num_work_items)
            if requested and requested > 0:
                final_limit = min(final_limit, requested)
            
@@ -273,7 +276,7 @@ def max_workers(
                log(
                    "zip",
                    f"Using Max7zInst={MAX_7Z_INSTANCES} from config → "
-                    f"requested: {requested}, final: {final_limit}",
+                    f"work items: {num_work_items}, requested: {requested}, final: {final_limit}",
                    verbose_only=True,
                    verbose=verbose
                )
@@ -281,18 +284,24 @@ def max_workers(
            return (final_limit, fixed_dict_size_bytes)
        
        # Auto-calculate based on RAM if Max7zInst not configured
+        # "Balls-to-the-walls" mode: use maximum resources
        if available_ram is not None:
-            # 7z uses ~2-3x dictionary size in RAM, but with overhead use 8x for safety
-            # This accounts for 7z's internal buffers, OS overhead, and other processes
-            FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 8 * 1024 * 1024  # 8GB per job
+            # 7z uses ~2-3x dictionary size in RAM, use 3x for aggressive mode
+            # This is more realistic and allows more concurrent workers
+            FIXED_RAM_PER_JOB = FIXED_DICT_SIZE_MB * 3 * 1024 * 1024  # 3GB per job
            
            # available_ram is already 80% of total (20% reserved for system)
-            # Use only 40% of that for compression jobs (very conservative to prevent swapping)
-            compression_ram = int(available_ram * 0.4)
+            # Use 95% of available RAM for compression jobs (aggressive mode)
+            compression_ram = int(available_ram * 0.95)
            
            # Calculate worker limit based on fixed per-job RAM
            ram_limit = max(1, compression_ram // FIXED_RAM_PER_JOB)
            
+            # Cap at number of actual work items (can't have more workers than jobs)
+            num_work_items = len(work_items) if work_items else 0
+            if num_work_items > 0:
+                ram_limit = min(ram_limit, num_work_items)
+            
            # Use RAM limit directly (no CPU limit)
            final_limit = ram_limit
            if requested and requested > 0:
@@ -304,8 +313,8 @@ def max_workers(
                ram_per_job_gb = FIXED_RAM_PER_JOB / (1024 ** 3)
                log(
                    "zip",
-                    f"RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (40%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → "
-                    f"RAM limit: {ram_limit}, requested: {requested}, final: {final_limit}",
+                    f"RAM: {ram_gb:.1f}GB available (80% of total), {compression_ram_gb:.1f}GB for compression (95%), {ram_per_job_gb:.1f}GB per job (dict: {FIXED_DICT_SIZE_MB}MB) → "
+                    f"RAM limit: {ram_limit}, work items: {num_work_items}, requested: {requested}, final: {final_limit}",
                    verbose_only=True,
                    verbose=verbose
                )
@@ -313,11 +322,20 @@ def max_workers(
            return (final_limit, fixed_dict_size_bytes)
        
        # RAM detection failed, use a safe default (no CPU limit)
-        if verbose:
-            log("zip", "RAM detection failed and Max7zInst not set, using default worker limit of 4", verbose_only=True, verbose=verbose)
        default_limit = 4
+        num_work_items = len(work_items) if work_items else 0
+        if num_work_items > 0:
+            default_limit = min(default_limit, num_work_items)
        if requested and requested > 0:
-            default_limit = requested
+            default_limit = min(default_limit, requested)
+        if verbose:
+            log(
+                "zip",
+                f"RAM detection failed and Max7zInst not set, using default worker limit → "
+                f"work items: {num_work_items}, requested: {requested}, final: {default_limit}",
+                verbose_only=True,
+                verbose=verbose
+            )
        return (default_limit, fixed_dict_size_bytes)
    
    # For zip compression, use existing estimation-based approach