From 3d19fb01bef4dbfe964178bb7c05c36925d2c0fe Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 Sep 2024 21:35:47 +0530 Subject: [PATCH 1/2] Switch to threaded worker for book render Now that almost all code runs in C and releases the GIL these perform much better. Cuts the time of first render for a 400K word book on my system by half from 1.9 to 0.9 seconds. --- src/calibre/srv/render_book.py | 204 +++++++++------------------------ 1 file changed, 53 insertions(+), 151 deletions(-) diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index 286c9c136f1f..69eb4bb644d3 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -5,17 +5,15 @@ import json import os import sys -import time from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor from datetime import datetime from functools import partial from itertools import count -from math import ceil from lxml.etree import Comment from calibre import detect_ncpus, force_unicode, prepare_string_for_xml -from calibre.constants import iswindows from calibre.customize.ui import plugin_for_input_format from calibre.ebooks.oeb.base import EPUB, OEB_DOCS, OEB_STYLES, OPF, SMIL, XHTML, XHTML_NS, XLINK, rewrite_links, urlunquote from calibre.ebooks.oeb.base import XPath as _XPath @@ -24,14 +22,10 @@ from calibre.ebooks.oeb.polish.cover import find_cover_image, find_cover_image_in_page, find_cover_page from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc from calibre.ebooks.oeb.polish.utils import guess_type -from calibre.ptempfile import PersistentTemporaryDirectory from calibre.srv.metadata import encode_datetime -from calibre.srv.opts import grouper from calibre.utils.date import EPOCH -from calibre.utils.filenames import rmtree -from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.logging import default_log -from calibre.utils.serialize import json_dumps, json_loads, msgpack_dumps, msgpack_loads +from calibre.utils.serialize import json_dumps, json_loads, msgpack_loads from calibre.utils.short_uuid import uuid4 from calibre_extensions.fast_css_transform import transform_properties from polyglot.binary import as_base64_unicode as encode_component @@ -501,102 +495,6 @@ def handle_link(a, attr='href'): f.write(shtml) -class RenderManager: - - def __init__(self, max_workers): - self.max_workers = max_workers - - def launch_worker(self): - with open(os.path.join(self.tdir, f'{len(self.workers)}.json'), 'wb') as output: - error = open(os.path.join(self.tdir, f'{len(self.workers)}.error'), 'wb') - p = start_pipe_worker('from calibre.srv.render_book import worker_main; worker_main()', stdout=error, stderr=error) - p.output_path = output.name - p.error_path = error.name - self.workers.append(p) - - def __enter__(self): - self.workers = [] - self.tdir = PersistentTemporaryDirectory() - return self - - def __exit__(self, *a): - while self.workers: - p = self.workers.pop() - if p.poll() is not None: - continue - p.terminate() - if not iswindows and p.poll() is None: - time.sleep(0.02) - if p.poll() is None: - p.kill() - del self.workers - try: - rmtree(self.tdir) - except OSError: - time.sleep(0.1) - try: - rmtree(self.tdir) - except OSError: - pass - del self.tdir - - def launch_workers(self, names, in_process_container): - num_workers = min(detect_ncpus(), len(names)) - if self.max_workers: - num_workers = min(num_workers, self.max_workers) - if num_workers > 1: - if len(names) < 3 or sum(os.path.getsize(in_process_container.name_path_map[n]) for n in names) < 128 * 1024: - num_workers = 1 - if num_workers > 1: - num_other_workers = num_workers - 1 - while len(self.workers) < num_other_workers: - self.launch_worker() - return num_workers - - def __call__(self, names, args, in_process_container): - num_workers = len(self.workers) + 1 - if num_workers == 1: - return [process_book_files(names, *args, container=in_process_container)] - - group_sz = int(ceil(len(names) / num_workers)) - groups = tuple(grouper(group_sz, names)) - for group, worker in zip(groups[:-1], self.workers): - worker.stdin.write(as_bytes(msgpack_dumps((worker.output_path, group,) + args))) - worker.stdin.flush(), worker.stdin.close() - worker.job_sent = True - - for worker in self.workers: - if not hasattr(worker, 'job_sent'): - worker.stdin.write(b'_'), worker.stdin.flush(), worker.stdin.close() - - error = None - results = [process_book_files(groups[-1], *args, container=in_process_container)] - for worker in self.workers: - if not hasattr(worker, 'job_sent'): - worker.wait() - continue - if worker.wait() != 0: - with open(worker.error_path, 'rb') as f: - error = f.read().decode('utf-8', 'replace') - else: - with open(worker.output_path, 'rb') as f: - results.append(msgpack_loads(f.read())) - if error is not None: - raise Exception('Render worker failed with error:\n' + error) - return results - - -def worker_main(): - stdin = getattr(sys.stdin, 'buffer', sys.stdin) - raw = stdin.read() - if raw == b'_': - return - args = msgpack_loads(raw) - result = process_book_files(*args[1:]) - with open(args[0], 'wb') as f: - f.write(as_bytes(msgpack_dumps(result))) - - def virtualize_html(container, name, link_uid, link_to_map, virtualized_names): changed = set() @@ -634,7 +532,7 @@ def handle_link(a, attr='href'): __smil_file_names__ = '' -def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, data_for_clone, container=None): +def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, data_for_clone=None, container=None): if container is None: container = SimpleContainer(container_dir, opfpath, default_log, clone_data=data_for_clone) container.cloned = False @@ -664,9 +562,19 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link return link_to_map, html_data, virtualized_names, smil_map +def calculate_number_of_workers(names, in_process_container, max_workers): + num_workers = min(detect_ncpus(), len(names)) + if max_workers: + num_workers = min(num_workers, max_workers) + if num_workers > 1: + if len(names) < 3 or sum(os.path.getsize(in_process_container.name_path_map[n]) for n in names) < 128 * 1024: + num_workers = 1 + return num_workers + + def process_exploded_book( - book_fmt, opfpath, input_fmt, tdir, render_manager, log=None, book_hash=None, save_bookmark_data=False, - book_metadata=None, virtualize_resources=True + book_fmt, opfpath, input_fmt, tdir, log=None, book_hash=None, save_bookmark_data=False, + book_metadata=None, virtualize_resources=True, max_workers=1 ): log = log or default_log container = SimpleContainer(tdir, opfpath, log) @@ -676,15 +584,8 @@ def process_exploded_book( def needs_work(mt): return mt in OEB_STYLES or mt in OEB_DOCS or mt in ('image/svg+xml', 'application/smil', 'application/smil+xml') - def work_priority(name): - # ensure workers with large files or stylesheets - # have the less names - size = os.path.getsize(container.name_path_map[name]), - is_html = container.mime_map.get(name) in OEB_DOCS - return (0 if is_html else 1), size - - if not is_comic: - render_manager.launch_workers(tuple(n for n, mt in iteritems(container.mime_map) if needs_work(mt)), container) + names_that_need_work = tuple(n for n, mt in iteritems(container.mime_map) if needs_work(mt)) + num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers) bookmark_data = None if save_bookmark_data: @@ -741,15 +642,17 @@ def work_priority(name): 'page_list_anchor_map': pagelist_anchor_map(page_list), } - names = sorted( - (n for n, mt in iteritems(container.mime_map) if needs_work(mt)), - key=work_priority) + results = [] + if num_workers < 2: + results.append(process_book_files(names_that_need_work, tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container=container)) + else: + with ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = tuple( + executor.submit(process_book_files, (name,), tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container=container) + for name in names_that_need_work) + for future in futures: + results.append(future.result()) - results = render_manager( - names, ( - tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container.data_for_clone() - ), container - ) ltm = book_render_data['link_to_map'] html_data = {} virtualized_names = set() @@ -899,33 +802,32 @@ def get_stored_annotations(container, bookmark_data): def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False, virtualize_resources=True, max_workers=1): pathtoebook = os.path.abspath(pathtoebook) - with RenderManager(max_workers) as render_manager: - mi = None - if serialize_metadata: - from calibre.customize.ui import quick_metadata - from calibre.ebooks.metadata.meta import get_metadata - with open(pathtoebook, 'rb') as f, quick_metadata: - mi = get_metadata(f, os.path.splitext(pathtoebook)[1][1:].lower()) - book_fmt, opfpath, input_fmt = extract_book(pathtoebook, output_dir, log=default_log) - container, bookmark_data = process_exploded_book( - book_fmt, opfpath, input_fmt, output_dir, render_manager, - book_hash=book_hash, save_bookmark_data=extract_annotations, - book_metadata=mi, virtualize_resources=virtualize_resources - ) - if serialize_metadata: - from calibre.ebooks.metadata.book.serialize import metadata_as_dict - d = metadata_as_dict(mi) - d.pop('cover_data', None) - serialize_datetimes(d), serialize_datetimes(d.get('user_metadata', {})) - with open(os.path.join(output_dir, 'calibre-book-metadata.json'), 'wb') as f: - f.write(json_dumps(d)) - if extract_annotations: - annotations = None - if bookmark_data: - annotations = json_dumps(tuple(get_stored_annotations(container, bookmark_data))) - if annotations: - with open(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f: - f.write(annotations) + mi = None + if serialize_metadata: + from calibre.customize.ui import quick_metadata + from calibre.ebooks.metadata.meta import get_metadata + with open(pathtoebook, 'rb') as f, quick_metadata: + mi = get_metadata(f, os.path.splitext(pathtoebook)[1][1:].lower()) + book_fmt, opfpath, input_fmt = extract_book(pathtoebook, output_dir, log=default_log) + container, bookmark_data = process_exploded_book( + book_fmt, opfpath, input_fmt, output_dir, max_workers=max_workers, + book_hash=book_hash, save_bookmark_data=extract_annotations, + book_metadata=mi, virtualize_resources=virtualize_resources + ) + if serialize_metadata: + from calibre.ebooks.metadata.book.serialize import metadata_as_dict + d = metadata_as_dict(mi) + d.pop('cover_data', None) + serialize_datetimes(d), serialize_datetimes(d.get('user_metadata', {})) + with open(os.path.join(output_dir, 'calibre-book-metadata.json'), 'wb') as f: + f.write(json_dumps(d)) + if extract_annotations: + annotations = None + if bookmark_data: + annotations = json_dumps(tuple(get_stored_annotations(container, bookmark_data))) + if annotations: + with open(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f: + f.write(annotations) def render_for_viewer(path, out_dir, book_hash): From cea3db20c2c508430b7975b48b6b891f27966e5c Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Wed, 18 Sep 2024 16:25:05 +0100 Subject: [PATCH 2/2] Enhancement #2079784: Save the main window geometry when saving a layout. --- src/calibre/gui2/actions/layout_actions.py | 13 ++++++++++++- src/calibre/gui2/geometry.py | 12 ++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/actions/layout_actions.py b/src/calibre/gui2/actions/layout_actions.py index 820ce0c73c73..0c3c16933911 100644 --- a/src/calibre/gui2/actions/layout_actions.py +++ b/src/calibre/gui2/actions/layout_actions.py @@ -8,6 +8,7 @@ from calibre.gui2 import error_dialog, gprefs, question_dialog from calibre.gui2.actions import InterfaceAction, show_menu_under_widget +from calibre.gui2.geometry import _restore_geometry, save_geometry, delete_geometry from calibre.utils.icu import sort_key @@ -166,8 +167,12 @@ def apply_layout(self, name): Throws KeyError if the name doesn't exist. ''' - layouts = gprefs['saved_layouts'] # This can be called by plugins so let the exception fly + + # Restore the application window geometry if we have it. + _restore_geometry(self.gui, gprefs, f'saved_layout_{name}') + # Now the panel sizes inside the central widget + layouts = gprefs['saved_layouts'] settings = layouts[name] # Order is important here. change_layout() must be called before # unserializing the settings or panes like book details won't display @@ -201,6 +206,9 @@ def save_named_layout(self, name, settings): :param:`name` The name for the settings. :param:`settings`: The gui layout settings to save. ''' + # Save the main window geometry. + save_geometry(self.gui, gprefs, f'saved_layout_{name}') + # Now the panel sizes inside the central widget layouts = gprefs['saved_layouts'] layouts.update({name: settings}) gprefs['saved_layouts'] = layouts @@ -218,6 +226,9 @@ def delete_layout(self, name, show_warning=True): _('Do you really want to delete the saved layout {0}?').format(name), skip_dialog_name='delete_saved_gui_layout'): return + + # The information is stored as 2 preferences. Delete them both. + delete_geometry(gprefs, f'saved_layout_{name}') layouts = gprefs['saved_layouts'] layouts.pop(name, None) self.populate_menu() diff --git a/src/calibre/gui2/geometry.py b/src/calibre/gui2/geometry.py index 12795e942d47..f6d0fed162d9 100644 --- a/src/calibre/gui2/geometry.py +++ b/src/calibre/gui2/geometry.py @@ -12,6 +12,10 @@ from calibre.utils.config_base import tweaks +def geometry_pref_name(name): + return f'geometry-of-{name}' + + def is_debugging(): return _is_debugging() and tweaks.get('show_geometry_debug_output') @@ -77,6 +81,10 @@ def geometry_for_restore_as_dict(self: QWidget): return ans +def delete_geometry(prefs: dict, name: str): + prefs.pop(geometry_pref_name(name), None) + + def save_geometry(self: QWidget, prefs: dict, name: str): x = geometry_for_restore_as_dict(self) if x: @@ -84,7 +92,7 @@ def save_geometry(self: QWidget, prefs: dict, name: str): debug('Saving geometry for:', name) debug(x) x['qt'] = bytearray(self.saveGeometry()) - prefs.set(f'geometry-of-{name}', x) + prefs.set(geometry_pref_name(name), x) def find_matching_screen(screen_as_dict): @@ -153,7 +161,7 @@ def _restore_to_new_screen(self: QWidget, s: QScreen, saved_data: dict) -> bool: def _restore_geometry(self: QWidget, prefs: dict, name: str, get_legacy_saved_geometry: callable = None) -> bool: - x = prefs.get(f'geometry-of-{name}') + x = prefs.get(geometry_pref_name(name)) if not x: old = get_legacy_saved_geometry() if get_legacy_saved_geometry else prefs.get(name) if old is not None: