rewrite icaros cache handling

This commit is contained in:
2025-09-28 13:13:40 -06:00
parent 4687539a90
commit bcb9808cca
2 changed files with 3647 additions and 101 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -64,6 +64,13 @@ def parse_args():
"Subdirectories will always be processed.")
return parser.parse_args()
def _ffmpeg_enabled():
try:
val = os.environ.get('THUMBGEN_ENABLE_FFMPEG', '1').strip().lower()
return val in ('1', 'true', 'yes', 'on')
except Exception:
return False
def find_files(dir):
@@ -125,38 +132,20 @@ def ensure_directory_exists(path):
def create_video_thumbnails(source_path, dest_dir):
"""Generate video thumbnails using Icaros cache first, then Windows provider, then FFmpeg fallback"""
file_ext = os.path.splitext(source_path)[1].lower()
# Skip Icaros cache extraction - cache doesn't match current files
# TODO: Re-enable after cache rebuild with current file set
# print(f"Trying Icaros cache extraction for {os.path.basename(source_path)}...")
# icaros_thumb = extract_icaros_thumbnail(source_path)
# if icaros_thumb:
# print(f" -> SUCCESS: Using Icaros cache thumbnail")
# generate_synology_thumbnails(icaros_thumb, dest_dir, include_video_screenshot=True)
# return
# Skip Windows extraction for formats that typically don't work
unsupported_formats = ['.m2ts', '.mts', '.flv', '.webm']
if file_ext not in unsupported_formats:
# Try Windows thumbnail extraction second (still faster than FFmpeg)
print(f"Icaros cache failed, trying Windows thumbnail extraction...")
windows_thumb = extract_windows_thumbnail(source_path)
if windows_thumb:
print(f" -> SUCCESS: Using Windows thumbnail provider")
generate_synology_thumbnails(windows_thumb, dest_dir, include_video_screenshot=True)
return
else:
print(f"Skipping Windows thumbnail extraction for {file_ext} format, using FFmpeg...")
# Fallback to FFmpeg
create_video_thumbnails_ffmpeg(source_path, dest_dir)
"""Generate video thumbnails: Windows Shell (Icaros-backed) first, FFmpeg fallback."""
# Try Windows thumbnail extraction first (leverages Icaros provider when present)
windows_thumb = extract_windows_thumbnail(source_path)
if windows_thumb:
print(f" -> SUCCESS: Using Windows/Icaros provider")
generate_synology_thumbnails(windows_thumb, dest_dir, include_video_screenshot=True)
return
# Only reach here if both Icaros and Windows extraction failed
print(f"Both Icaros and Windows extraction failed for {source_path}, using FFmpeg...")
create_video_thumbnails_ffmpeg(source_path, dest_dir)
# Optionally fall back to FFmpeg (disabled by default)
if _ffmpeg_enabled():
print(f"Windows/Icaros extraction failed for {source_path}, using FFmpeg...")
create_video_thumbnails_ffmpeg(source_path, dest_dir)
else:
print(f"Windows/Icaros extraction failed for {source_path}, FFmpeg disabled (THUMBGEN_ENABLE_FFMPEG=0). Skipping.")
def create_video_thumbnails_ffmpeg(source_path, dest_dir):
@@ -213,8 +202,8 @@ def create_psd_thumbnails(source_path, dest_dir):
def create_psd_thumbnails_direct(source_path, dest_dir):
"""Generate PSD thumbnails using PIL with PSD support (fallback method)"""
try:
# Try to open PSD file - requires pillow-psd plugin
from psd_tools import PSDImage
# Try to open PSD file - requires psd-tools
from psd_tools import PSDImage # type: ignore[reportMissingImports]
psd = PSDImage.open(source_path)
pil_image = psd.composite()
@@ -306,29 +295,35 @@ def extract_windows_thumbnail(file_path):
except Exception as e:
print(f" -> Windows thumbcache extraction failed: {e}")
# TEMPORARILY DISABLED: Tier 2: Try Thumbs.db extraction
print(f" -> DISABLED: Skipping Thumbs.db extraction for debugging")
# try:
# directory = os.path.dirname(file_path)
# thumbs_db_path = os.path.join(directory, 'Thumbs.db')
#
# if os.path.exists(thumbs_db_path):
# print(f" -> Found Thumbs.db, checking for {filename}")
# thumb = extract_from_thumbs_db(thumbs_db_path, filename)
# if thumb:
# print(f" -> Found thumbnail in Thumbs.db for {filename}")
# return thumb
# else:
# print(f" -> No thumbnail in Thumbs.db for {filename}")
# else:
# print(f" -> No Thumbs.db found in directory")
# except Exception as e:
# print(f" -> Thumbs.db extraction failed: {e}")
# Tier 2: Try Thumbs.db extraction
try:
directory = os.path.dirname(file_path)
thumbs_db_path = os.path.join(directory, 'Thumbs.db')
if os.path.exists(thumbs_db_path):
print(f" -> Found Thumbs.db, checking for {filename}")
thumb = extract_from_thumbs_db(thumbs_db_path, filename)
if thumb:
print(f" -> Found thumbnail in Thumbs.db for {filename}")
return thumb
else:
print(f" -> No thumbnail in Thumbs.db for {filename}")
else:
print(f" -> No Thumbs.db found in directory")
except Exception as e:
print(f" -> Thumbs.db extraction failed: {e}")
# TEMPORARILY DISABLED: Tier 3: Skip Icaros cache (algorithm produces random results)
print(f" -> DISABLED: Skipping Icaros cache for debugging")
# TODO: Properly reverse engineer Icaros cache mapping
# print(f" -> Skipping Icaros cache (mapping algorithm incomplete)")
# Tier 3: Try Icaros cache extraction
try:
print(f" -> Trying Icaros cache extraction...")
icaros_thumb = extract_icaros_thumbnail(file_path)
if icaros_thumb:
print(f" -> Found thumbnail in Icaros cache for {filename}")
return icaros_thumb
else:
print(f" -> No thumbnail in Icaros cache for {filename}")
except Exception as e:
print(f" -> Icaros cache extraction failed: {e}")
print(f" -> Windows thumbnail extraction failed for {filename}")
return None
@@ -454,7 +449,7 @@ try {{
}}
}}
}}
"@
"@ -ReferencedAssemblies System.Drawing
Write-Output "DEBUG: Type definitions loaded, calling GetThumbnail..."
@@ -480,9 +475,10 @@ try {{
print(f" -> Executing PowerShell script...")
# Execute PowerShell script
result = subprocess.run([
"powershell", "-Command", powershell_script
], capture_output=True, text=True, timeout=30)
# Force Windows PowerShell 5.1 (powershell.exe) instead of pwsh
ps_exe = "powershell.exe" if os.name == 'nt' else "powershell"
result = subprocess.run([ps_exe, "-NoProfile", "-Command", powershell_script],
capture_output=True, text=True, timeout=30)
print(f" -> PowerShell return code: {result.returncode}")
print(f" -> PowerShell stdout: {result.stdout}")
@@ -571,7 +567,7 @@ try {{
def extract_from_thumbs_db(thumbs_db_path, target_filename):
"""Extract specific file thumbnail from Thumbs.db"""
try:
import olefile
import olefile # type: ignore[reportMissingModuleSource]
if not olefile.isOleFile(thumbs_db_path):
return None
@@ -680,20 +676,9 @@ def extract_icaros_thumbnail(file_path):
import sqlite3
import hashlib
# Icaros stores thumbnails in .icdb database files
icaros_cache_dir = r"C:\Program Files\Icaros\IcarosCache"
if not os.path.exists(icaros_cache_dir):
# Try alternative common locations
alt_locations = [
r"C:\Program Files (x86)\Icaros\IcarosCache",
os.path.expanduser(r"~\AppData\Local\Icaros\IcarosCache"),
os.path.expanduser(r"~\AppData\Roaming\Icaros\IcarosCache")
]
for alt_dir in alt_locations:
if os.path.exists(alt_dir):
icaros_cache_dir = alt_dir
break
else:
# Locate Icaros cache directory
icaros_cache_dir = _get_icaros_cache_dir()
if not icaros_cache_dir:
print(f" -> No Icaros cache directory found")
return None
@@ -706,34 +691,29 @@ def extract_icaros_thumbnail(file_path):
except Exception:
pass
# Look for .icdb database files
icdb_files = [f for f in os.listdir(icaros_cache_dir) if f.endswith('.icdb')]
if not icdb_files:
# Discover .icdb databases by size preference
icdb_paths = _list_icdb_databases(icaros_cache_dir)
if not icdb_paths:
print(f" -> No .icdb database files found")
return None
# Try to extract from the largest database first (likely has the best quality)
def get_size_from_filename(filename):
try:
if '_' in filename and filename.endswith('.icdb'):
return int(filename.split('_')[1].split('.')[0])
return 0
except (ValueError, IndexError):
return 0
icdb_files.sort(key=get_size_from_filename, reverse=True)
for icdb_file in icdb_files:
try:
icdb_path = os.path.join(icaros_cache_dir, icdb_file)
img = extract_from_icdb_database(icdb_path, file_path)
if img:
print(f" -> Found thumbnail in {icdb_file}")
return img
except Exception as e:
print(f" -> Failed to read {icdb_file}: {e}")
continue
# Lookup a precise index for this file from Icaros_idx.icdb (SQLite or binary)
mapped_index, preferred_db = _lookup_icaros_index(icaros_cache_dir, file_path)
if mapped_index is None:
print(f" -> No exact Icaros index entry for this file; skipping Icaros")
return None
# Try preferred DB first if provided
ordered_dbs = icdb_paths
if preferred_db is not None and preferred_db in icdb_paths:
ordered_dbs = [preferred_db] + [p for p in icdb_paths if p != preferred_db]
for icdb_path in ordered_dbs:
img = extract_from_icdb_database(icdb_path, file_path, forced_index=mapped_index)
if img:
print(f" -> Found thumbnail in {os.path.basename(icdb_path)} via mapped index")
return img
print(f" -> Mapped index did not resolve a thumbnail in any DB")
return None
except ImportError:
@@ -744,8 +724,259 @@ def extract_icaros_thumbnail(file_path):
return None
def extract_from_icdb_database(icdb_path, file_path):
"""Extract thumbnail from Icaros .icdb binary cache file using alphabetical mapping"""
# --- Icaros helpers: cache dir, index mapping, and database discovery ---
def _get_icaros_cache_dir():
"""Return the Icaros cache directory if found, else None."""
candidates = [
r"C:\\Program Files\\Icaros\\IcarosCache",
r"C:\\Program Files (x86)\\Icaros\\IcarosCache",
os.path.expanduser(r"~\\AppData\\Local\\Icaros\\IcarosCache"),
os.path.expanduser(r"~\\AppData\\Roaming\\Icaros\\IcarosCache"),
]
for path in candidates:
if os.path.exists(path):
return path
return None
_ICAROS_INDEX_CACHE = None
def _normalize_windows_path(path):
try:
return os.path.normcase(os.path.abspath(path))
except Exception:
return path.lower()
def _build_icaros_index_map(cache_dir):
"""Attempt to build a file->position map from Icaros_idx.icdb (if SQLite).
Returns dict[path_lower] -> { 'index': int, 'db': optional full path to icdb }
"""
global _ICAROS_INDEX_CACHE
if _ICAROS_INDEX_CACHE is not None:
return _ICAROS_INDEX_CACHE
idx_path = os.path.join(cache_dir, 'Icaros_idx.icdb')
if not os.path.exists(idx_path):
_ICAROS_INDEX_CACHE = {}
return _ICAROS_INDEX_CACHE
try:
import sqlite3
conn = sqlite3.connect(idx_path)
cur = conn.cursor()
# Discover tables
cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [r[0] for r in cur.fetchall()]
mapping = {}
def pick_cols(table):
cur.execute(f"PRAGMA table_info('{table}')")
cols = cur.fetchall()
# cols: cid, name, type, notnull, dflt_value, pk
path_cols = [c[1] for c in cols if isinstance(c[1], str) and c[1].lower() in (
'path', 'filepath', 'file_path', 'fullpath', 'filename', 'name')]
index_cols = [c[1] for c in cols if isinstance(c[1], str) and any(k in c[1].lower() for k in (
'index', 'position', 'pos', 'idx', 'imageindex', 'thumbindex'))]
db_cols = [c[1] for c in cols if isinstance(c[1], str) and any(k in c[1].lower() for k in (
'db', 'database', 'cache', 'size'))]
return path_cols, index_cols, db_cols
for t in tables:
try:
path_cols, index_cols, db_cols = pick_cols(t)
if not path_cols or not index_cols:
continue
# Use the first reasonable candidates
pcol = path_cols[0]
icol = index_cols[0]
dcol = db_cols[0] if db_cols else None
q = f"SELECT {pcol}, {icol}" + (f", {dcol}" if dcol else "") + f" FROM '{t}'"
for row in cur.execute(q):
try:
p = _normalize_windows_path(row[0])
idx_val = int(row[1]) if row[1] is not None else None
db_hint = None
if dcol and len(row) >= 3 and row[2]:
# Some schemas might store db size or name; try to resolve to a file
db_hint = _resolve_icdb_hint(cache_dir, str(row[2]))
if idx_val is not None:
mapping[p] = {'index': idx_val, 'db': db_hint}
except Exception:
continue
except Exception:
continue
conn.close()
_ICAROS_INDEX_CACHE = mapping
if mapping:
print(f" -> Loaded Icaros index map for {len(mapping)} files")
else:
print(f" -> Icaros index database present but no usable mapping found")
return _ICAROS_INDEX_CACHE
except Exception as e:
print(f" -> Failed to read Icaros index: {e}")
_ICAROS_INDEX_CACHE = {}
return _ICAROS_INDEX_CACHE
def _lookup_icaros_index(cache_dir, file_path):
"""Return (index, preferred_db_path) for file_path by inspecting Icaros_idx.icdb.
Tries multiple strategies:
1) Direct SQLite mapping (exact normalized full path match)
2) If SQLite not available, try binary scan for embedded UTF-16/UTF-8 path followed by an int
3) If still not found, try UNC/drive-letter normalization variants (R:\ -> \\Server\Share)
Returns (None, None) if not found.
"""
# Try SQLite-based mapping first
index_map = _build_icaros_index_map(cache_dir)
key = _normalize_windows_path(file_path)
if key in index_map:
entry = index_map[key]
return entry.get('index'), entry.get('db')
# Try alternate path forms (UNC vs drive letter)
alt_keys = _generate_alternate_windows_paths(key)
for ak in alt_keys:
if ak in index_map:
entry = index_map[ak]
return entry.get('index'), entry.get('db')
# Binary fallback: attempt to find path occurrence in Icaros_idx.icdb and read nearby int
idx_path = os.path.join(cache_dir, 'Icaros_idx.icdb')
try:
with open(idx_path, 'rb') as f:
data = f.read()
# Try UTF-16LE
needle_utf16 = key.encode('utf-16le')
pos = data.find(needle_utf16)
if pos == -1:
# Try UTF-8
needle_utf8 = key.encode('utf-8')
pos = data.find(needle_utf8)
if pos != -1:
# Scan forward a small window for a plausible 32-bit little-endian index
win = data[pos:pos + 256]
for off in range(0, min(256 - 4, len(win) - 4), 4):
try:
idx_candidate = struct.unpack('<I', win[off:off+4])[0]
if 0 <= idx_candidate < 100000: # sanity bound
return idx_candidate, None
except Exception:
continue
except Exception:
pass
# Environment-guided fallback: user-provided monitored roots (semi-colon separated)
# Example: ICAROS_MONITORED_ROOTS=R:\\YouTube\\Streams\\MixerTwitch;R:\\Videos
roots_env = os.environ.get('ICAROS_MONITORED_ROOTS')
if roots_env:
roots = [r.strip() for r in roots_env.split(';') if r.strip()]
idx = _compute_index_from_roots(roots, file_path)
if idx is not None:
return idx, None
return None, None
def _generate_alternate_windows_paths(norm_path):
"""Generate alternate path spellings (drive <-> UNC) to match Icaros records."""
variants = set()
p = norm_path
variants.add(p)
try:
# If path is drive form like R:\folder, try to map to UNC if the drive is a mapped network drive
if len(p) >= 3 and p[1:3] == ':\\':
drive = p[0:2]
# Environment-based hint (not perfect): if a share root env exists
unc_root = os.environ.get('ICAROS_UNC_ROOT') # e.g. \\Hydra\Hydra
if unc_root:
tail = p[2:].lstrip('\\')
variants.add(os.path.normcase(os.path.join(unc_root, tail)))
except Exception:
pass
return list(variants)
def _compute_index_from_roots(roots, file_path):
"""Approximate Icaros index by enumerating all supported files under provided roots
and sorting case-insensitively, then taking the ordinal of file_path within that list.
Returns None if file not found.
"""
try:
supported_exts = {'.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.m4v', '.ts', '.webm'}
all_files = []
norm_roots = []
for root in roots:
if not root:
continue
nr = os.path.normcase(os.path.abspath(root))
norm_roots.append(nr)
if os.path.exists(nr):
for r, dnames, fnames in os.walk(nr):
for fn in fnames:
ext = os.path.splitext(fn)[1].lower()
if ext in supported_exts:
all_files.append(os.path.join(r, fn))
if not all_files:
return None
all_files.sort(key=lambda p: os.path.normcase(p))
key = os.path.normcase(os.path.abspath(file_path))
try:
return all_files.index(key)
except ValueError:
return None
except Exception:
return None
def _resolve_icdb_hint(cache_dir, hint):
"""Try to resolve a DB hint (e.g., size or name) to a concrete .icdb file path."""
try:
paths = _list_icdb_databases(cache_dir)
# If hint is a number (like 2560), pick that size DB
try:
size = int(str(hint).strip())
for p in paths:
base = os.path.basename(p)
if f"_{size}.icdb" in base:
return p
except Exception:
pass
# If hint is a substring of a file name
for p in paths:
if str(hint).lower() in os.path.basename(p).lower():
return p
return None
except Exception:
return None
def _list_icdb_databases(cache_dir):
"""Return .icdb database paths ordered by preferred resolution (largest first)."""
files = [f for f in os.listdir(cache_dir) if f.endswith('.icdb') and f.lower() != 'icaros_idx.icdb']
def get_size_from_filename(filename):
try:
if '_' in filename and filename.endswith('.icdb'):
return int(filename.split('_')[1].split('.')[0])
return 0
except (ValueError, IndexError):
return 0
files.sort(key=get_size_from_filename, reverse=True)
return [os.path.join(cache_dir, f) for f in files]
def extract_from_icdb_database(icdb_path, file_path, forced_index=None):
"""Extract thumbnail from Icaros .icdb binary cache file.
If forced_index is provided, use that exact JPEG ordinal if present; otherwise
fall back to heuristic alphabetical mapping (best-effort).
"""
try:
import struct
import glob
@@ -776,7 +1007,27 @@ def extract_from_icdb_database(icdb_path, file_path):
print(f" -> Found {len(jpeg_positions)} JPEG images in {icdb_path}")
# Use discovered alphabetical algorithm
# If we have a mapped index, try it directly first
if isinstance(forced_index, int) and len(jpeg_positions) > 0:
pos_index = forced_index
if pos_index < 0:
pos_index = 0
if pos_index >= len(jpeg_positions):
pos_index = pos_index % len(jpeg_positions)
jpeg_start = jpeg_positions[pos_index]
jpeg_end = data.find(b'\xff\xd9', jpeg_start)
if jpeg_end != -1:
jpeg_end += 2
jpeg_data = data[jpeg_start:jpeg_end]
try:
img = Image.open(io.BytesIO(jpeg_data))
print(f" -> Used mapped index {pos_index} from {os.path.basename(icdb_path)}")
return img.copy()
except Exception:
# If mapped position invalid, continue to heuristic
print(f" -> Mapped index {pos_index} invalid; falling back to heuristic")
# Heuristic alphabetical mapping (fallback)
def get_alphabetical_position(target_file_path):
"""Get cache position based on alphabetical sorting of monitored files"""
# Get Icaros supported extensions