114 lines
3.7 KiB
Python
114 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix EPUB series metadata for Kavita compatibility.
|
|
Usage: python fix_series.py /path/to/books "Series Name"
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
import zipfile
|
|
import shutil
|
|
from lxml import etree
|
|
|
|
def find_opf(epub_path):
|
|
"""Find the OPF file path inside the epub."""
|
|
with zipfile.ZipFile(epub_path, 'r') as z:
|
|
container = z.read('META-INF/container.xml')
|
|
root = etree.fromstring(container)
|
|
ns = {'c': 'urn:oasis:names:tc:opendocument:xmlns:container'}
|
|
opf_path = root.find('.//c:rootfile', ns).get('full-path')
|
|
return opf_path
|
|
|
|
def update_opf_metadata(opf_content, series_name, series_index):
|
|
"""Add/update Calibre series metadata in OPF XML."""
|
|
root = etree.fromstring(opf_content)
|
|
ns = {'opf': 'http://www.idpf.org/2007/opf',
|
|
'dc': 'http://purl.org/dc/elements/1.1/'}
|
|
|
|
metadata = root.find('.//opf:metadata', ns)
|
|
if metadata is None:
|
|
print(" WARNING: No metadata element found, skipping.")
|
|
return None
|
|
|
|
# Remove existing series meta tags
|
|
for meta in metadata.findall('opf:meta', ns):
|
|
name = meta.get('name', '')
|
|
if name in ('calibre:series', 'calibre:series_index'):
|
|
metadata.remove(meta)
|
|
|
|
# Add new ones
|
|
OPF = 'http://www.idpf.org/2007/opf'
|
|
series_meta = etree.SubElement(metadata, f'{{{OPF}}}meta')
|
|
series_meta.set('name', 'calibre:series')
|
|
series_meta.set('content', series_name)
|
|
|
|
index_meta = etree.SubElement(metadata, f'{{{OPF}}}meta')
|
|
index_meta.set('name', 'calibre:series_index')
|
|
index_meta.set('content', str(series_index))
|
|
|
|
return etree.tostring(root, xml_declaration=True, encoding='utf-8', pretty_print=True)
|
|
|
|
def extract_index_from_filename(filename):
|
|
"""Try to extract a volume/chapter number from the filename."""
|
|
match = re.search(r'[vVcC](?:ol|olume|h|hapter)?[.\s_-]*(\d+(?:\.\d+)?)', filename)
|
|
if match:
|
|
return float(match.group(1))
|
|
# fallback: any number in the filename
|
|
match = re.search(r'(\d+(?:\.\d+)?)', filename)
|
|
if match:
|
|
return float(match.group(1))
|
|
return None
|
|
|
|
def fix_epub(epub_path, series_name, series_index):
|
|
"""Update the OPF metadata inside an epub in-place."""
|
|
tmp_path = epub_path + '.tmp'
|
|
opf_path = find_opf(epub_path)
|
|
|
|
with zipfile.ZipFile(epub_path, 'r') as zin:
|
|
with zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED) as zout:
|
|
for item in zin.infolist():
|
|
data = zin.read(item.filename)
|
|
if item.filename == opf_path:
|
|
updated = update_opf_metadata(data, series_name, series_index)
|
|
if updated:
|
|
data = updated
|
|
zout.writestr(item, data)
|
|
|
|
shutil.move(tmp_path, epub_path)
|
|
print(f" ✓ Updated: index={series_index}")
|
|
|
|
def main():
|
|
if len(sys.argv) < 3:
|
|
print("Usage: python fix_series.py /path/to/folder \"Series Name\"")
|
|
sys.exit(1)
|
|
|
|
folder = sys.argv[1]
|
|
series_name = sys.argv[2]
|
|
|
|
epubs = sorted([f for f in os.listdir(folder) if f.lower().endswith('.epub')])
|
|
if not epubs:
|
|
print("No EPUBs found.")
|
|
sys.exit(0)
|
|
|
|
print(f"Found {len(epubs)} EPUBs. Series: '{series_name}'\n")
|
|
|
|
for i, filename in enumerate(epubs, start=1):
|
|
path = os.path.join(folder, filename)
|
|
index = extract_index_from_filename(filename)
|
|
if index is None:
|
|
index = float(i)
|
|
print(f"[{filename}] No index found, using position {i}")
|
|
else:
|
|
print(f"[{filename}] Detected index {index}")
|
|
|
|
try:
|
|
fix_epub(path, series_name, index)
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
|
|
print("\nDone! Rescan your Kavita library.")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|