thumbs.db extraction WORKING

2025-06-29 18:15:35 -06:00
parent 56685a38dd
commit 98a592bdef
5 changed files with 1765 additions and 70 deletions
--- a/psthumbgen.py
+++ b/psthumbgen.py
@@ -117,7 +117,20 @@ def ensure_directory_exists(path):


 def create_video_thumbnails(source_path, dest_dir):
-    """Generate video thumbnails using FFmpeg"""
+    """Generate video thumbnails using Windows provider first, then FFmpeg fallback"""
+    # Try Windows thumbnail extraction first (much faster!)
+    windows_thumb = extract_windows_thumbnail(source_path)
+    if windows_thumb:
+        generate_synology_thumbnails(windows_thumb, dest_dir, include_video_screenshot=True)
+        return
+    
+    # Fallback to FFmpeg if Windows extraction fails
+    print(f"Windows thumbnail extraction failed for {source_path}, using FFmpeg...")
+    create_video_thumbnails_ffmpeg(source_path, dest_dir)
+
+
+def create_video_thumbnails_ffmpeg(source_path, dest_dir):
+    """Generate video thumbnails using FFmpeg (fallback method)"""
    to_generate = (('SYNOVIDEO_VIDEO_SCREENSHOT.jpg', 1280),
                   ('SYNOPHOTO_THUMB_XL.jpg', 1280),
                   ('SYNOPHOTO_THUMB_B.jpg', 640),
@@ -155,7 +168,20 @@ def create_video_thumbnails(source_path, dest_dir):


 def create_psd_thumbnails(source_path, dest_dir):
-    """Generate PSD thumbnails using PIL with PSD support"""
+    """Generate PSD thumbnails using Windows/Icaros provider first, then psd-tools fallback"""
+    # Try Windows thumbnail extraction first (uses Icaros or built-in PSD support)
+    windows_thumb = extract_windows_thumbnail(source_path)
+    if windows_thumb:
+        generate_synology_thumbnails(windows_thumb, dest_dir)
+        return
+    
+    # Fallback to psd-tools if Windows extraction fails
+    print(f"Windows thumbnail extraction failed for {source_path}, using psd-tools...")
+    create_psd_thumbnails_direct(source_path, dest_dir)
+
+
+def create_psd_thumbnails_direct(source_path, dest_dir):
+    """Generate PSD thumbnails using PIL with PSD support (fallback method)"""
    try:
        # Try to open PSD file - requires pillow-psd plugin
        from psd_tools import PSDImage
@@ -185,41 +211,235 @@ def create_psd_thumbnails(source_path, dest_dir):
        print(f"Error processing PSD {source_path}: {e}")


-def extract_blend_thumbnail(blend_path):
-    """Extracts the largest PNG thumbnail from all PREV blocks in a .blend file."""
-    import struct, re
-    with open(blend_path, 'rb') as f:
-        data = f.read()
+def generate_synology_thumbnails(image, dest_dir, include_video_screenshot=False):
+    """Generate all required Synology thumbnail sizes from a PIL image"""
+    to_generate = [
+        ('SYNOPHOTO_THUMB_XL.jpg', 1280),
+        ('SYNOPHOTO_THUMB_B.jpg', 640),
+        ('SYNOPHOTO_THUMB_M.jpg', 320),
+        ('SYNOPHOTO_THUMB_PREVIEW.jpg', 160),
+        ('SYNOPHOTO_THUMB_S.jpg', 120)
+    ]
+    
+    # Add video screenshot for video files
+    if include_video_screenshot:
+        to_generate.insert(0, ('SYNOVIDEO_VIDEO_SCREENSHOT.jpg', 1280))

-    if not data.startswith(b'BLENDER'):
+    for thumb_name, thumb_size in to_generate:
+        thumb_path = os.path.join(dest_dir, thumb_name)
+        if os.path.exists(thumb_path):
+            continue
+
+        # Create a copy for thumbnailing
+        img_copy = image.copy()
+        img_copy.thumbnail((thumb_size, thumb_size), Image.LANCZOS)
+        
+        # Convert to RGB if needed for JPEG
+        if img_copy.mode == 'RGBA':
+            img_copy = img_copy.convert('RGB')
+            
+        img_copy.save(thumb_path, 'JPEG', quality=85)
+
+
+def extract_blend_thumbnail(blend_path):
+    """Extract thumbnail from .blend file using Windows thumbnail provider or direct extraction."""
+    
+    # Option 1: Try using Windows thumbnail extraction first (fastest)
+    try:
+        temp_thumb = extract_windows_thumbnail(blend_path)
+        if temp_thumb:
+            return temp_thumb
+    except Exception:
+        pass
+    
+    # Option 2: Direct .blend file parsing (more reliable)
+    try:
+        return extract_blend_thumbnail_direct(blend_path)
+    except Exception as e:
+        print(f"Failed to extract .blend thumbnail: {e}")
        return None
-    pointer_size = 8 if data[7:8] == b'-' else 4
-    endian = '<' if data[8:9] == b'v' else '>'
-    offset = 12
-    candidates = []
-    while offset + 16 < len(data):
-        code = data[offset:offset+4]
-        size = struct.unpack(endian + 'I', data[offset+4:offset+8])[0]
-        block_data_start = offset + 16 + pointer_size
-        block_data_end = block_data_start + size
-        if code == b'PREV':
-            block = data[block_data_start:block_data_end]
-            # Find all PNGs in this block
-            for m in re.finditer(b'\x89PNG\r\n\x1a\n', block):
-                start = m.start()
-                end = block.find(b'IEND', start)
-                if end != -1:
-                    end += 8
-                    png_data = block[start:end]
-                    try:
-                        img = Image.open(io.BytesIO(png_data))
-                        candidates.append((img.size[0] * img.size[1], img))
-                    except Exception:
-                        continue
-        offset = block_data_end
-    if not candidates:
+
+
+def extract_windows_thumbnail(file_path):
+    """Extract thumbnail from adjacent Thumbs.db file."""
+    try:
+        # Look for Thumbs.db in the same directory
+        directory = os.path.dirname(file_path)
+        filename = os.path.basename(file_path)
+        thumbs_db_path = os.path.join(directory, 'Thumbs.db')
+        
+        if not os.path.exists(thumbs_db_path):
+            return None
+            
+        return extract_from_thumbs_db(thumbs_db_path, filename)
+        
+    except Exception:
        return None
-    return max(candidates, key=lambda x: x[0])[1]
+
+
+def extract_from_thumbs_db(thumbs_db_path, target_filename):
+    """Extract specific file thumbnail from Thumbs.db"""
+    try:
+        import olefile
+        
+        if not olefile.isOleFile(thumbs_db_path):
+            return None
+            
+        ole = olefile.OleFileIO(thumbs_db_path)
+        
+        # Get list of streams
+        all_streams = ole.listdir()
+        
+        # Thumbs.db stores thumbnails with their filename as the stream name
+        # Try different variations of the filename
+        stream_names = []
+        base_name = os.path.splitext(target_filename)[0]
+        
+        # Common patterns for thumbnail stream names in Thumbs.db
+        stream_names.extend([
+            target_filename,
+            target_filename.lower(),
+            target_filename.upper(),
+            base_name,
+            base_name.lower(),
+            base_name.upper(),
+        ])
+        
+        # First try filename-based matching (older Thumbs.db format)
+        for stream_name in ole.listdir():
+            stream_path = '/'.join(stream_name) if isinstance(stream_name, list) else stream_name
+            
+            # Check if this stream corresponds to our target file
+            for candidate in stream_names:
+                if candidate in stream_path or stream_path.endswith(candidate):
+                    img = extract_thumbnail_from_stream(ole, stream_name)
+                    if img:
+                        ole.close()
+                        return img
+        
+        # If filename matching failed, try hash-based extraction (newer format)
+        for stream_name in ole.listdir():
+            stream_path = '/'.join(stream_name) if isinstance(stream_name, list) else stream_name
+            
+            # Look for thumbnail streams (usually start with size like "256_")
+            if stream_path.startswith(('256_', '96_', '32_', 'Thumbnail')):
+                img = extract_thumbnail_from_stream(ole, stream_name)
+                if img:
+                    ole.close()
+                    return img
+        ole.close()
+        return None
+        
+    except ImportError:
+        print("Warning: olefile not installed. Install with: pip install olefile")
+        return None
+    except Exception as e:
+        print(f"Debug: Thumbs.db extraction error: {e}")
+        return None
+
+
+def extract_thumbnail_from_stream(ole, stream_name):
+    """Helper function to extract thumbnail from a Thumbs.db stream"""
+    try:
+        # Use the original list format as returned by ole.listdir()
+        with ole.openstream(stream_name) as stream:
+            # Read the full stream data to analyze the header structure
+            stream.seek(0)
+            full_data = stream.read()
+            
+            # Try different header offsets for Thumbs.db format
+            # Common offsets: 12, 16, 20, 24, 32
+            for offset in [12, 16, 20, 24, 32]:
+                try:
+                    thumbnail_data = full_data[offset:]
+                    if len(thumbnail_data) > 0:
+                        img = Image.open(io.BytesIO(thumbnail_data))
+                        return img
+                except Exception:
+                    continue
+            
+            # If standard offsets fail, try to find JPEG/PNG signatures
+            # Look for JPEG signature (FF D8 FF)
+            jpeg_start = full_data.find(b'\xff\xd8\xff')
+            if jpeg_start != -1:
+                try:
+                    img = Image.open(io.BytesIO(full_data[jpeg_start:]))
+                    return img
+                except Exception:
+                    pass
+            
+            # Look for PNG signature (89 50 4E 47)
+            png_start = full_data.find(b'\x89PNG')
+            if png_start != -1:
+                try:
+                    img = Image.open(io.BytesIO(full_data[png_start:]))
+                    return img
+                except Exception:
+                    pass
+            
+            return None
+            
+    except Exception:
+        return None
+
+
+def extract_blend_thumbnail_direct(blend_path):
+    """Direct extraction of embedded thumbnail from .blend file."""
+    import struct
+    
+    with open(blend_path, 'rb') as f:
+        # Read header
+        header = f.read(12)
+        if not header.startswith(b'BLENDER'):
+            return None
+            
+        # Determine architecture and endianness
+        pointer_size = 8 if header[7:8] == b'-' else 4
+        endian = '<' if header[8:9] == b'v' else '>'
+        
+        # Find the largest embedded preview image
+        best_preview = None
+        best_size = 0
+        
+        while True:
+            # Read block header
+            block_header = f.read(16 + pointer_size)
+            if len(block_header) < 16 + pointer_size:
+                break
+                
+            code = block_header[:4]
+            size = struct.unpack(endian + 'I', block_header[4:8])[0]
+            
+            if code == b'PREV':
+                # Read preview block data
+                preview_data = f.read(size)
+                if len(preview_data) < size:
+                    break
+                
+                # Look for PNG signature
+                png_start = preview_data.find(b'\x89PNG\r\n\x1a\n')
+                if png_start != -1:
+                    # Find the end of this PNG
+                    png_end = preview_data.find(b'IEND', png_start)
+                    if png_end != -1:
+                        png_end += 8  # Include IEND + CRC
+                        png_data = preview_data[png_start:png_end]
+                        
+                        try:
+                            img = Image.open(io.BytesIO(png_data))
+                            img_size = img.size[0] * img.size[1]
+                            
+                            # Keep the largest preview
+                            if img_size > best_size:
+                                best_preview = img.copy()
+                                best_size = img_size
+                        except Exception:
+                            continue
+            else:
+                # Skip this block
+                f.seek(size, 1)
+                
+        return best_preview


 def create_blend_thumbnails(source_path, dest_dir):
@@ -229,43 +449,26 @@ def create_blend_thumbnails(source_path, dest_dir):
        print(f"No embedded thumbnail in {source_path}")
        return

-    to_generate = (('SYNOPHOTO_THUMB_XL.jpg', 1280),
-                   ('SYNOPHOTO_THUMB_B.jpg', 640),
-                   ('SYNOPHOTO_THUMB_M.jpg', 320),
-                   ('SYNOPHOTO_THUMB_PREVIEW.jpg', 160),
-                   ('SYNOPHOTO_THUMB_S.jpg', 120))
-
-    for thumb in to_generate:
-        thumb_path = os.path.join(dest_dir, thumb[0])
-        if os.path.exists(thumb_path):
-            continue
-        im_copy = img.copy()
-        im_copy.thumbnail((thumb[1], thumb[1]), Image.LANCZOS)
-        if im_copy.mode == 'RGBA':
-            im_copy = im_copy.convert('RGB')
-        im_copy.save(thumb_path, 'JPEG', quality=85)
+    generate_synology_thumbnails(img, dest_dir)


 def create_thumbnails(source_path, dest_dir):
-    """Original image thumbnail creation"""
+    """Image thumbnail creation using Windows provider first, then PIL fallback"""
+    # Try Windows thumbnail extraction first
+    windows_thumb = extract_windows_thumbnail(source_path)
+    if windows_thumb:
+        generate_synology_thumbnails(windows_thumb, dest_dir)
+        return
+    
+    # Fallback to PIL for basic image processing
+    create_thumbnails_pil(source_path, dest_dir)
+
+
+def create_thumbnails_pil(source_path, dest_dir):
+    """Original PIL-based image thumbnail creation (fallback method)"""
    try:
        im = Image.open(source_path)
-
-        to_generate = (('SYNOPHOTO_THUMB_XL.jpg', 1280),
-                       ('SYNOPHOTO_THUMB_B.jpg', 640),
-                       ('SYNOPHOTO_THUMB_M.jpg', 320),
-                       ('SYNOPHOTO_THUMB_PREVIEW.jpg', 160),
-                       ('SYNOPHOTO_THUMB_S.jpg', 120))
-
-        for thumb in to_generate:
-            thumb_path = os.path.join(dest_dir, thumb[0])
-            if os.path.exists(thumb_path):
-                continue
-
-            # Create a copy for thumbnailing
-            im_copy = im.copy()
-            im_copy.thumbnail((thumb[1], thumb[1]), Image.LANCZOS)
-            im_copy.save(thumb_path)
+        generate_synology_thumbnails(im, dest_dir)
            
    except Exception as e:
        print(f"Error processing image {source_path}: {e}")