series-fixer/fixer.py

#!/usr/bin/env python3
"""
Fix EPUB series metadata for Kavita compatibility.
Usage: python fix_series.py /path/to/books "Series Name"
"""

import os
import sys
import re
import zipfile
import shutil
from lxml import etree

def find_opf(epub_path):
    """Find the OPF file path inside the epub."""
    with zipfile.ZipFile(epub_path, 'r') as z:
        container = z.read('META-INF/container.xml')
        root = etree.fromstring(container)
        ns = {'c': 'urn:oasis:names:tc:opendocument:xmlns:container'}
        opf_path = root.find('.//c:rootfile', ns).get('full-path')
        return opf_path

def update_opf_metadata(opf_content, series_name, series_index):
    """Add/update Calibre series metadata in OPF XML."""
    root = etree.fromstring(opf_content)
    ns = {'opf': 'http://www.idpf.org/2007/opf',
          'dc': 'http://purl.org/dc/elements/1.1/'}

    metadata = root.find('.//opf:metadata', ns)
    if metadata is None:
        print("  WARNING: No metadata element found, skipping.")
        return None

    # Remove existing series meta tags
    for meta in metadata.findall('opf:meta', ns):
        name = meta.get('name', '')
        if name in ('calibre:series', 'calibre:series_index'):
            metadata.remove(meta)

    # Add new ones
    OPF = 'http://www.idpf.org/2007/opf'
    series_meta = etree.SubElement(metadata, f'{{{OPF}}}meta')
    series_meta.set('name', 'calibre:series')
    series_meta.set('content', series_name)

    index_meta = etree.SubElement(metadata, f'{{{OPF}}}meta')
    index_meta.set('name', 'calibre:series_index')
    index_meta.set('content', str(series_index))

    return etree.tostring(root, xml_declaration=True, encoding='utf-8', pretty_print=True)

def extract_index_from_filename(filename):
    """Try to extract a volume/chapter number from the filename."""
    match = re.search(r'[vVcC](?:ol|olume|h|hapter)?[.\s_-]*(\d+(?:\.\d+)?)', filename)
    if match:
        return float(match.group(1))
    # fallback: any number in the filename
    match = re.search(r'(\d+(?:\.\d+)?)', filename)
    if match:
        return float(match.group(1))
    return None

def fix_epub(epub_path, series_name, series_index):
    """Update the OPF metadata inside an epub in-place."""
    tmp_path = epub_path + '.tmp'
    opf_path = find_opf(epub_path)

    with zipfile.ZipFile(epub_path, 'r') as zin:
        with zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                data = zin.read(item.filename)
                if item.filename == opf_path:
                    updated = update_opf_metadata(data, series_name, series_index)
                    if updated:
                        data = updated
                zout.writestr(item, data)

    shutil.move(tmp_path, epub_path)
    print(f"  ✓ Updated: index={series_index}")

def main():
    if len(sys.argv) < 3:
        print("Usage: python fix_series.py /path/to/folder \"Series Name\"")
        sys.exit(1)

    folder = sys.argv[1]
    series_name = sys.argv[2]

    epubs = sorted([f for f in os.listdir(folder) if f.lower().endswith('.epub')])
    if not epubs:
        print("No EPUBs found.")
        sys.exit(0)

    print(f"Found {len(epubs)} EPUBs. Series: '{series_name}'\n")

    for i, filename in enumerate(epubs, start=1):
        path = os.path.join(folder, filename)
        index = extract_index_from_filename(filename)
        if index is None:
            index = float(i)
            print(f"[{filename}] No index found, using position {i}")
        else:
            print(f"[{filename}] Detected index {index}")

        try:
            fix_epub(path, series_name, index)
        except Exception as e:
            print(f"  ERROR: {e}")

    print("\nDone! Rescan your Kavita library.")

if __name__ == '__main__':
    main()