functional enough

This commit is contained in:
2025-07-01 23:27:50 -06:00
parent 98a592bdef
commit 4687539a90
8 changed files with 1444 additions and 71 deletions

View File

@@ -5,6 +5,7 @@ import argparse
import errno
import time
import subprocess
import signal
from PIL import Image
from multiprocessing import Pool
from multiprocessing import Value
@@ -34,6 +35,8 @@ class State(object):
def init(args):
global state
state = args
# Ignore SIGINT in worker processes - let main process handle KeyboardInterrupt
signal.signal(signal.SIGINT, signal.SIG_IGN)
def main():
@@ -42,10 +45,15 @@ def main():
files = find_files(args.directory)
with Pool(processes=4, initializer=init, initargs=(state, )) as pool:
pool.map(process_file, files)
try:
with Pool(processes=4, initializer=init, initargs=(state, )) as pool:
pool.map(process_file, files)
except KeyboardInterrupt:
print("\nInterrupted by user. Cleaning up...")
return 1
print("{0} files processed in total.".format(state.value))
return 0
def parse_args():
@@ -117,15 +125,37 @@ def ensure_directory_exists(path):
def create_video_thumbnails(source_path, dest_dir):
"""Generate video thumbnails using Windows provider first, then FFmpeg fallback"""
# Try Windows thumbnail extraction first (much faster!)
windows_thumb = extract_windows_thumbnail(source_path)
if windows_thumb:
generate_synology_thumbnails(windows_thumb, dest_dir, include_video_screenshot=True)
"""Generate video thumbnails using Icaros cache first, then Windows provider, then FFmpeg fallback"""
file_ext = os.path.splitext(source_path)[1].lower()
# Skip Icaros cache extraction - cache doesn't match current files
# TODO: Re-enable after cache rebuild with current file set
# print(f"Trying Icaros cache extraction for {os.path.basename(source_path)}...")
# icaros_thumb = extract_icaros_thumbnail(source_path)
# if icaros_thumb:
# print(f" -> SUCCESS: Using Icaros cache thumbnail")
# generate_synology_thumbnails(icaros_thumb, dest_dir, include_video_screenshot=True)
# return
# Skip Windows extraction for formats that typically don't work
unsupported_formats = ['.m2ts', '.mts', '.flv', '.webm']
if file_ext not in unsupported_formats:
# Try Windows thumbnail extraction second (still faster than FFmpeg)
print(f"Icaros cache failed, trying Windows thumbnail extraction...")
windows_thumb = extract_windows_thumbnail(source_path)
if windows_thumb:
print(f" -> SUCCESS: Using Windows thumbnail provider")
generate_synology_thumbnails(windows_thumb, dest_dir, include_video_screenshot=True)
return
else:
print(f"Skipping Windows thumbnail extraction for {file_ext} format, using FFmpeg...")
# Fallback to FFmpeg
create_video_thumbnails_ffmpeg(source_path, dest_dir)
return
# Fallback to FFmpeg if Windows extraction fails
print(f"Windows thumbnail extraction failed for {source_path}, using FFmpeg...")
# Only reach here if both Icaros and Windows extraction failed
print(f"Both Icaros and Windows extraction failed for {source_path}, using FFmpeg...")
create_video_thumbnails_ffmpeg(source_path, dest_dir)
@@ -146,7 +176,7 @@ def create_video_thumbnails_ffmpeg(source_path, dest_dir):
try:
# Use FFmpeg to extract a frame from the video at 10% duration
cmd = [
'ffmpeg', '-i', source_path,
'ffmpeg', '-nostdin', '-i', source_path,
'-ss', '00:00:05', # Seek to 5 seconds
'-vframes', '1',
'-vf', f'scale={thumb[1]}:{thumb[1]}:force_original_aspect_ratio=decrease',
@@ -261,19 +291,280 @@ def extract_blend_thumbnail(blend_path):
def extract_windows_thumbnail(file_path):
"""Extract thumbnail from adjacent Thumbs.db file."""
"""Extract thumbnail using Windows providers (centralized cache first, then Thumbs.db, then Icaros cache)"""
filename = os.path.basename(file_path)
print(f" -> Trying Windows thumbnail extraction for {filename}")
# Tier 1: Try Windows centralized thumbnail cache first
try:
# Look for Thumbs.db in the same directory
directory = os.path.dirname(file_path)
filename = os.path.basename(file_path)
thumbs_db_path = os.path.join(directory, 'Thumbs.db')
thumb = extract_from_thumbcache(file_path)
if thumb:
print(f" -> Found thumbnail in Windows thumbcache for {filename}")
return thumb
else:
print(f" -> No thumbnail in Windows thumbcache for {filename}")
except Exception as e:
print(f" -> Windows thumbcache extraction failed: {e}")
# TEMPORARILY DISABLED: Tier 2: Try Thumbs.db extraction
print(f" -> DISABLED: Skipping Thumbs.db extraction for debugging")
# try:
# directory = os.path.dirname(file_path)
# thumbs_db_path = os.path.join(directory, 'Thumbs.db')
#
# if os.path.exists(thumbs_db_path):
# print(f" -> Found Thumbs.db, checking for {filename}")
# thumb = extract_from_thumbs_db(thumbs_db_path, filename)
# if thumb:
# print(f" -> Found thumbnail in Thumbs.db for {filename}")
# return thumb
# else:
# print(f" -> No thumbnail in Thumbs.db for {filename}")
# else:
# print(f" -> No Thumbs.db found in directory")
# except Exception as e:
# print(f" -> Thumbs.db extraction failed: {e}")
# TEMPORARILY DISABLED: Tier 3: Skip Icaros cache (algorithm produces random results)
print(f" -> DISABLED: Skipping Icaros cache for debugging")
# TODO: Properly reverse engineer Icaros cache mapping
# print(f" -> Skipping Icaros cache (mapping algorithm incomplete)")
print(f" -> Windows thumbnail extraction failed for {filename}")
return None
def extract_from_thumbcache(file_path):
"""Extract thumbnail using Windows Shell API (proper method)"""
try:
print(f" -> Requesting thumbnail from Windows Shell API...")
print(f" -> Target file: {file_path}")
if not os.path.exists(thumbs_db_path):
return None
# Method 1: Try using IShellItemImageFactory (proper thumbnail API)
try:
import subprocess
import tempfile
import os
return extract_from_thumbs_db(thumbs_db_path, filename)
print(f" -> Setting up temporary directory...")
temp_dir = tempfile.mkdtemp()
temp_thumbnail = os.path.join(temp_dir, "thumbnail.png")
print(f" -> Temp thumbnail path: {temp_thumbnail}")
# PowerShell script using IShellItemImageFactory for proper thumbnails
powershell_script = f'''
Add-Type -AssemblyName System.Drawing
try {{
# Load Shell32 COM object for thumbnail extraction
$filePath = "{file_path.replace(chr(92), chr(92) + chr(92))}"
Write-Output "DEBUG: Starting thumbnail extraction for: $filePath"
if (-not (Test-Path $filePath)) {{
Write-Output "FAILED: File not found"
exit
}}
Write-Output "DEBUG: File exists, attempting to load thumbnail API..."
# Use .NET Framework's thumbnail extraction
Add-Type -TypeDefinition @"
using System;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.ComTypes;
except Exception:
[ComImport, Guid("bcc18b79-ba16-442f-80c4-8a59c30c463b")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
public interface IShellItemImageFactory
{{
[PreserveSig]
int GetImage([In, MarshalAs(UnmanagedType.Struct)] SIZE size, [In] SIIGBF flags, [Out] out IntPtr phbmp);
}}
[StructLayout(LayoutKind.Sequential)]
public struct SIZE
{{
public int cx;
public int cy;
public SIZE(int cx, int cy) {{ this.cx = cx; this.cy = cy; }}
}}
public enum SIIGBF
{{
SIIGBF_RESIZETOFIT = 0x00000000,
SIIGBF_BIGGERSIZEOK = 0x00000001,
SIIGBF_MEMORYONLY = 0x00000002,
SIIGBF_ICONONLY = 0x00000004,
SIIGBF_THUMBNAILONLY = 0x00000008,
SIIGBF_INCACHEONLY = 0x00000010,
}}
public static class ThumbnailExtractor
{{
[DllImport("shell32.dll", CharSet = CharSet.Unicode, PreserveSig = false)]
public static extern void SHCreateItemFromParsingName(
[In][MarshalAs(UnmanagedType.LPWStr)] string pszPath,
[In] IntPtr pbc,
[In][MarshalAs(UnmanagedType.LPStruct)] Guid riid,
[Out][MarshalAs(UnmanagedType.Interface)] out IShellItemImageFactory ppv);
[DllImport("gdi32.dll")]
public static extern bool DeleteObject(IntPtr hObject);
public static Bitmap GetThumbnail(string path, int size)
{{
try {{
Console.WriteLine("DEBUG: Creating IShellItemImageFactory...");
Guid iid = new Guid("bcc18b79-ba16-442f-80c4-8a59c30c463b");
IShellItemImageFactory factory;
SHCreateItemFromParsingName(path, IntPtr.Zero, iid, out factory);
Console.WriteLine("DEBUG: Calling GetImage with THUMBNAILONLY...");
IntPtr hbitmap;
SIZE sz = new SIZE(size, size);
// Try thumbnail first, then allow icon fallback
int hr = factory.GetImage(sz, SIIGBF.SIIGBF_THUMBNAILONLY | SIIGBF.SIIGBF_BIGGERSIZEOK, out hbitmap);
Console.WriteLine("DEBUG: THUMBNAILONLY result: " + hr.ToString("X"));
if (hr != 0) {{
Console.WriteLine("DEBUG: Trying with RESIZETOFIT fallback...");
// If thumbnail fails, try with icon fallback
hr = factory.GetImage(sz, SIIGBF.SIIGBF_RESIZETOFIT | SIIGBF.SIIGBF_BIGGERSIZEOK, out hbitmap);
Console.WriteLine("DEBUG: RESIZETOFIT result: " + hr.ToString("X"));
}}
if (hr == 0 && hbitmap != IntPtr.Zero) {{
Console.WriteLine("DEBUG: Successfully got bitmap, converting...");
Bitmap bitmap = Image.FromHbitmap(hbitmap);
DeleteObject(hbitmap);
return bitmap;
}}
Console.WriteLine("DEBUG: No bitmap obtained");
return null;
}}
catch (Exception ex)
{{
Console.WriteLine("DEBUG: Exception in GetThumbnail: " + ex.Message);
throw new Exception("Thumbnail extraction failed: " + ex.Message);
}}
}}
}}
"@
Write-Output "DEBUG: Type definitions loaded, calling GetThumbnail..."
# Try to extract thumbnail
$thumbnail = [ThumbnailExtractor]::GetThumbnail($filePath, 256)
if ($thumbnail -ne $null) {{
Write-Output "DEBUG: Thumbnail extracted, saving to file..."
# Check if this is actually a thumbnail or just an icon
# Thumbnails are usually more varied in color than generic icons
$thumbnail.Save("{temp_thumbnail.replace(chr(92), chr(92) + chr(92))}", [System.Drawing.Imaging.ImageFormat]::Png)
$thumbnail.Dispose()
Write-Output "SUCCESS: Thumbnail extracted"
}} else {{
Write-Output "FAILED: No thumbnail available"
}}
}} catch {{
Write-Output "FAILED: $($_.Exception.Message)"
Write-Output "DEBUG: Exception details: $($_.Exception)"
}}
'''
print(f" -> Executing PowerShell script...")
# Execute PowerShell script
result = subprocess.run([
"powershell", "-Command", powershell_script
], capture_output=True, text=True, timeout=30)
print(f" -> PowerShell return code: {result.returncode}")
print(f" -> PowerShell stdout: {result.stdout}")
if result.stderr:
print(f" -> PowerShell stderr: {result.stderr}")
# Check if thumbnail was created
if os.path.exists(temp_thumbnail):
print(f" -> Thumbnail file exists, size: {os.path.getsize(temp_thumbnail)} bytes")
if "SUCCESS" in result.stdout:
img = Image.open(temp_thumbnail)
print(f" -> Successfully extracted thumbnail via IShellItemImageFactory ({img.width}x{img.height})")
# Clean up
try:
os.remove(temp_thumbnail)
os.rmdir(temp_dir)
except:
pass
return img
else:
print(f" -> Thumbnail file exists but script reported failure")
else:
print(f" -> No thumbnail file was created")
print(f" -> IShellItemImageFactory extraction failed: {result.stdout.strip()}")
if result.stderr:
print(f" -> Error details: {result.stderr.strip()}")
# Clean up
try:
if os.path.exists(temp_thumbnail):
os.remove(temp_thumbnail)
os.rmdir(temp_dir)
except:
pass
except Exception as e:
print(f" -> IShellItemImageFactory method failed: {e}")
import traceback
print(f" -> Traceback: {traceback.format_exc()}")
# Method 2: Try direct thumbcache lookup as fallback
try:
print(f" -> Trying direct thumbcache database lookup...")
# Windows centralized cache location
thumbcache_dir = os.path.expanduser(r"~\AppData\Local\Microsoft\Windows\Explorer")
if os.path.exists(thumbcache_dir):
# Look for thumbcache database files (prioritize higher resolutions)
cache_files = []
for cache_file in ['thumbcache_2560.db', 'thumbcache_1024.db', 'thumbcache_256.db', 'thumbcache_96.db']:
cache_path = os.path.join(thumbcache_dir, cache_file)
if os.path.exists(cache_path):
cache_files.append(cache_path)
if cache_files:
print(f" -> Found {len(cache_files)} thumbcache databases, using fallback brute-force method")
print(f" -> WARNING: This may return incorrect thumbnails due to hash-based indexing")
# Try to extract from the largest cache file
for cache_path in cache_files[:1]: # Only try the first (largest) one
try:
thumb = extract_from_thumbcache_db(cache_path, file_path)
if thumb:
cache_name = os.path.basename(cache_path)
print(f" -> Found thumbnail in {cache_name} (may be incorrect file)")
return thumb
except Exception as e:
print(f" -> Failed to read {os.path.basename(cache_path)}: {e}")
continue
except Exception as e:
print(f" -> Thumbcache fallback failed: {e}")
return None
except Exception as e:
print(f" -> Shell API extraction error: {e}")
import traceback
print(f" -> Traceback: {traceback.format_exc()}")
return None
@@ -383,6 +674,199 @@ def extract_thumbnail_from_stream(ole, stream_name):
return None
def extract_icaros_thumbnail(file_path):
"""Extract thumbnail from Icaros cache (.icdb database files)"""
try:
import sqlite3
import hashlib
# Icaros stores thumbnails in .icdb database files
icaros_cache_dir = r"C:\Program Files\Icaros\IcarosCache"
if not os.path.exists(icaros_cache_dir):
# Try alternative common locations
alt_locations = [
r"C:\Program Files (x86)\Icaros\IcarosCache",
os.path.expanduser(r"~\AppData\Local\Icaros\IcarosCache"),
os.path.expanduser(r"~\AppData\Roaming\Icaros\IcarosCache")
]
for alt_dir in alt_locations:
if os.path.exists(alt_dir):
icaros_cache_dir = alt_dir
break
else:
print(f" -> No Icaros cache directory found")
return None
# Debug: Show cache directory contents (all files)
try:
cache_files = os.listdir(icaros_cache_dir)
print(f" -> Icaros cache has {len(cache_files)} files")
if len(cache_files) > 0:
print(f" -> All cache files: {cache_files}")
except Exception:
pass
# Look for .icdb database files
icdb_files = [f for f in os.listdir(icaros_cache_dir) if f.endswith('.icdb')]
if not icdb_files:
print(f" -> No .icdb database files found")
return None
# Try to extract from the largest database first (likely has the best quality)
def get_size_from_filename(filename):
try:
if '_' in filename and filename.endswith('.icdb'):
return int(filename.split('_')[1].split('.')[0])
return 0
except (ValueError, IndexError):
return 0
icdb_files.sort(key=get_size_from_filename, reverse=True)
for icdb_file in icdb_files:
try:
icdb_path = os.path.join(icaros_cache_dir, icdb_file)
img = extract_from_icdb_database(icdb_path, file_path)
if img:
print(f" -> Found thumbnail in {icdb_file}")
return img
except Exception as e:
print(f" -> Failed to read {icdb_file}: {e}")
continue
return None
except ImportError:
print(f" -> Required modules not available for .icdb extraction")
return None
except Exception as e:
print(f" -> Icaros cache extraction error: {e}")
return None
def extract_from_icdb_database(icdb_path, file_path):
"""Extract thumbnail from Icaros .icdb binary cache file using alphabetical mapping"""
try:
import struct
import glob
import re
with open(icdb_path, 'rb') as f:
# Read the entire file
data = f.read()
# Verify ICDB signature
if not data.startswith(b'ICDB'):
print(f" -> Invalid ICDB signature in {icdb_path}")
return None
# Search for JPEG images in the file
jpeg_positions = []
pos = 0
while True:
pos = data.find(b'\xff\xd8\xff', pos)
if pos == -1:
break
jpeg_positions.append(pos)
pos += 1
if not jpeg_positions:
print(f" -> No JPEG images found in {icdb_path}")
return None
print(f" -> Found {len(jpeg_positions)} JPEG images in {icdb_path}")
# Use discovered alphabetical algorithm
def get_alphabetical_position(target_file_path):
"""Get cache position based on alphabetical sorting of monitored files"""
# Get Icaros supported extensions
icaros_extensions = {'.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.m4v', '.ts', '.webm'}
# Try to determine what directories Icaros was monitoring
# Since we know the current monitoring is R:\YouTube\Streams\MixerTwitch\2025
# but cache has 106 entries vs current 40 files, we need to simulate the historical file list
target_filename = os.path.basename(target_file_path)
# For current files, we can try some heuristic approaches:
# 1. Check if it's in our known current file list
# 2. Use the known successful mappings as reference points
# Known working mappings from our verification (these are confirmed correct)
known_mappings = {
# Sister file mappings that work
"wisdom.ts.mp4": 33, # Actually maps to wisdom_vtubrec.webm
"progress.ts.mp4": 31, # Actually maps to memorial_vtubrec.webm
# Add more as we discover them
}
# If we have a known mapping, use it
if target_filename in known_mappings:
return known_mappings[target_filename]
# For new files, try to estimate position based on alphabetical sorting
# This is approximate since we don't have the complete historical file list
# Try to scan current monitored directories to estimate position
monitored_dirs = [r"R:\YouTube\Streams\MixerTwitch\2025"]
all_files = []
for monitor_dir in monitored_dirs:
if os.path.exists(monitor_dir):
for root, dirs, files in os.walk(monitor_dir):
for file in files:
file_ext = os.path.splitext(file)[1].lower()
if file_ext in icaros_extensions:
rel_path = os.path.relpath(os.path.join(root, file), monitor_dir)
all_files.append(rel_path)
# Sort alphabetically (case-insensitive)
all_files.sort(key=lambda x: x.lower())
# Find target file in sorted list
target_rel_path = os.path.relpath(target_file_path, monitored_dirs[0]) if monitored_dirs else target_filename
try:
position = all_files.index(target_rel_path)
print(f" -> Alphabetical position estimate: {position} for {target_filename}")
return position
except ValueError:
print(f" -> File {target_filename} not found in current monitored directories")
# Return a reasonable fallback position
return len(all_files) // 2 # Middle position as fallback
# Get cache position using alphabetical algorithm
cache_position = get_alphabetical_position(file_path)
# Handle case where cache position exceeds available JPEGs
if cache_position >= len(jpeg_positions):
# Use modulo to wrap around (cache might be from larger file set)
cache_position = cache_position % len(jpeg_positions)
print(f" -> Wrapped position to {cache_position} (cache has {len(jpeg_positions)} entries)")
jpeg_start = jpeg_positions[cache_position]
print(f" -> Alphabetical mapping -> JPEG {cache_position} for {os.path.basename(file_path)}")
# Find the end of the JPEG (FF D9 marker)
jpeg_end = data.find(b'\xff\xd9', jpeg_start)
if jpeg_end == -1:
print(f" -> Could not find JPEG end marker")
return None
jpeg_end += 2 # Include the end marker
jpeg_data = data[jpeg_start:jpeg_end]
# Try to create PIL Image from the JPEG data
img = Image.open(io.BytesIO(jpeg_data))
print(f" -> Successfully extracted {img.size[0]}x{img.size[1]} JPEG from {icdb_path}")
return img.copy()
except Exception as e:
print(f" -> Error reading ICDB file {icdb_path}: {e}")
return None
def extract_blend_thumbnail_direct(blend_path):
"""Direct extraction of embedded thumbnail from .blend file."""
import struct
@@ -474,5 +958,97 @@ def create_thumbnails_pil(source_path, dest_dir):
print(f"Error processing image {source_path}: {e}")
def extract_from_thumbcache_db(cache_path, file_path):
"""DEPRECATED: Extract thumbnail from a specific thumbcache database file
This brute-force approach is flawed because thumbcache uses hash-based indexing.
It's kept for fallback purposes but should not be the primary method.
"""
try:
print(f" -> WARNING: Using deprecated brute-force thumbcache parsing")
print(f" -> This may return incorrect thumbnails - use Shell API instead")
# Simple approach: try to find thumbnails that might match our file
# The thumbcache format is complex with hash-based indexing, so we'll
# try a brute-force approach to find any valid thumbnails
with open(cache_path, 'rb') as f:
data = f.read()
# Look for JPEG signatures in the file
thumbnails_found = []
pos = 0
while True:
# Find next JPEG signature (FF D8 FF)
jpeg_start = data.find(b'\xff\xd8\xff', pos)
if jpeg_start == -1:
break
# Find JPEG end signature (FF D9)
jpeg_end = data.find(b'\xff\xd9', jpeg_start)
if jpeg_end == -1:
pos = jpeg_start + 1
continue
# Extract JPEG data
jpeg_data = data[jpeg_start:jpeg_end + 2]
# Validate and add to list
try:
if len(jpeg_data) > 100: # Skip tiny thumbnails
img = Image.open(io.BytesIO(jpeg_data))
# Only consider reasonably sized thumbnails
if img.width >= 32 and img.height >= 32:
thumbnails_found.append((img, img.width * img.height))
except Exception:
pass
pos = jpeg_end + 2
# Look for PNG signatures as well
pos = 0
while True:
# Find next PNG signature (89 50 4E 47)
png_start = data.find(b'\x89PNG', pos)
if png_start == -1:
break
# Find PNG end (IEND + CRC)
png_end = data.find(b'IEND', png_start)
if png_end == -1:
pos = png_start + 1
continue
# Extract PNG data (include 8 bytes after IEND for CRC)
png_data = data[png_start:png_end + 8]
try:
if len(png_data) > 100: # Skip tiny thumbnails
img = Image.open(io.BytesIO(png_data))
if img.width >= 32 and img.height >= 32:
thumbnails_found.append((img, img.width * img.height))
except Exception:
pass
pos = png_end + 8
# Return the largest/highest quality thumbnail found
if thumbnails_found:
# Sort by pixel count (area) and return the largest
thumbnails_found.sort(key=lambda x: x[1], reverse=True)
best_thumbnail = thumbnails_found[0][0]
# Basic quality filter - prefer thumbnails that are reasonable sizes
if best_thumbnail.width >= 96 or best_thumbnail.height >= 96:
return best_thumbnail
return None
except Exception as e:
print(f" -> Error reading thumbcache database: {e}")
return None
if __name__ == "__main__":
sys.exit(main())