diff --git a/doc/configuration.rst b/doc/configuration.rst index 3af7411b..48f28473 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -2095,6 +2095,22 @@ Advanced processing configuration .. versionadded:: 2.10 +.. confval:: confluence_manifest_data + + A manifest file (``scb-manifest.json``) is generated after each run + into the output directory. This information includes built pages as + well as attachments for these pages. Each page/attachment provides a + path to where the content resides. However, if a user wishes to + include this data into the manifest, this option can be used to + Base64-encode page/attachment data into the manifest. By default, this + is disabled: + + .. code-block:: python + + confluence_manifest_data = True + + .. versionadded:: 2.10 + .. index:: Mentions; Configuration .. _confluence_mentions: diff --git a/sphinxcontrib/confluencebuilder/__init__.py b/sphinxcontrib/confluencebuilder/__init__.py index 52e248ee..d459495d 100644 --- a/sphinxcontrib/confluencebuilder/__init__.py +++ b/sphinxcontrib/confluencebuilder/__init__.py @@ -244,6 +244,8 @@ def setup(app): cm.add_conf('confluence_link_suffix', 'confluence') # Enable raw math output for MathJax support cm.add_conf_bool('confluence_mathjax', 'confluence') + # Embed page/attachment data into the manifest + cm.add_conf_bool('confluence_manifest_data') # Mappings for documentation mentions to Confluence keys. cm.add_conf('confluence_mentions', 'confluence') # Inject navigational hints into the documentation. diff --git a/sphinxcontrib/confluencebuilder/builder.py b/sphinxcontrib/confluencebuilder/builder.py index 8336d471..e518e68b 100644 --- a/sphinxcontrib/confluencebuilder/builder.py +++ b/sphinxcontrib/confluencebuilder/builder.py @@ -20,6 +20,7 @@ from sphinxcontrib.confluencebuilder.env import ConfluenceCacheInfo from sphinxcontrib.confluencebuilder.intersphinx import build_intersphinx from sphinxcontrib.confluencebuilder.logger import ConfluenceLogger +from sphinxcontrib.confluencebuilder.manifest import ConfluenceManifest from sphinxcontrib.confluencebuilder.nodes import confluence_footer from sphinxcontrib.confluencebuilder.nodes import confluence_header from sphinxcontrib.confluencebuilder.nodes import confluence_metadata @@ -93,6 +94,8 @@ def __init__(self, app, env=None): self._original_get_doctree = None self._verbose = self.app.verbosity + self.manifest = ConfluenceManifest(self.config, self.state) + # state tracking is set at initialization (not cleanup) so its content's # can be checked/validated on after the builder has executed (testing) self.state.reset() @@ -103,6 +106,10 @@ def init(self): apply_defaults(self) config = self.config + # populate desired metadata into the manifest after the configuration + # has been finalized + self.manifest.register_metadata() + self.add_secnumbers = self.config.confluence_add_secnumbers self.secnumber_suffix = self.config.confluence_secnumber_suffix self.post_cleanup = config.confluence_cleanup_purge or \ @@ -476,6 +483,9 @@ def write_doc(self, docname, doctree): file.write(self.writer.output) except OSError as err: self.warn(f'error writing file {out_file}: {err}') + else: + self.manifest.add_page( + docname, self.writer.output, out_file, self.out_dir) self._cache_info.track_page_hash(docname) @@ -866,6 +876,20 @@ def to_asset_name(asset): self.publish_cleanup() self.publish_finalize() + else: + assets = self.assets.build() + + # track all referenced assets into the manifest + for asset in assets: + key, abs_file, mime, hash_, docname = asset + self.manifest.add_attachment( + docname, key, mime, hash_, Path(abs_file), self.out_dir) + + # output the manifest into the output directory + self.info('building manifest...', nonl=(not self._verbose)) + self.manifest.export(self.out_dir) + if not self._verbose: + self.info(' done') # persist cache from this run self._cache_info.save_cache() diff --git a/sphinxcontrib/confluencebuilder/manifest.py b/sphinxcontrib/confluencebuilder/manifest.py new file mode 100644 index 00000000..43807b11 --- /dev/null +++ b/sphinxcontrib/confluencebuilder/manifest.py @@ -0,0 +1,210 @@ +# SPDX-License-Identifier: BSD-2-Clause +# Copyright Sphinx Confluence Builder Contributors (AUTHORS) + +from base64 import b64encode +from datetime import datetime +from datetime import timezone +from docutils import __version__ as docutils_version +from pathlib import Path +from sphinx import __version__ as sphinx_version +from sphinx.config import Config +from sphinxcontrib.confluencebuilder.state import ConfluenceState +from sphinxcontrib.confluencebuilder.util import ConfluenceUtil +from typing import Any +import json +import os + + +class ConfluenceManifest: + def __init__(self, config: Config, state: ConfluenceState): + """ + a confluence manifest + + A manifest is generated after a build. It can be used to inform a + user or other tooling what pages/attachments have been processed along + what page titles and detected hierarchy is expected (if any). + + While this can be used for informational purposes, this information + can also be used by third-party tooling to take generated Confluence + information and perform publishing in their own manner (e.g. users + with an air-gapped environment or needing some sort of publish + separation due to authentication considerations). Note that while + this extension can generate a manifest, there is no tooling provided + to use the manifest in a way to publish. + + Args: + config: the active configuration + state: this extension's runtime state tracking + """ + self.config = config + self.state = state + + self.data = { + 'type': 'SphinxConfluenceBuilder/Manifest', + 'spec': 1, + } + + def register_metadata(self) -> None: + """ + register metadata into the tracked manifest + + When invoked, this call will populate various metadata into the + manifest cache from the resolved configuration (e.g. project version). + """ + + cfg = self.config + + if cfg.project and cfg.project != 'Project name not set': + self.data['project'] = cfg.project + + if cfg.release: + self.data['release'] = cfg.release + + if cfg.version: + self.data['version'] = cfg.version + + if cfg.author and cfg.author != 'Author name not set': + self.data['author'] = cfg.author + + if cfg.copyright: + self.data['copyright'] = cfg.copyright + + if cfg.language: + self.data['language'] = cfg.language + + if self.config.confluence_manifest_data: + self.data['includesData'] = True + + def add_page(self, docname: str, output: str, + out_file: Path, out_dir: Path) -> None: + """ + add a page into the manifest + + For any page that is built, this call is used to track it into the + manifest cache. This includes using the docname as a page identifier + and includes information such as the expected title for a page. + + Args: + docname: the docname + output: the raw output for a page + out_file: the relative path to the built page + out_dir: the base folder for any output data + """ + + title = self.state.title(docname) + + entry: dict[str, Any] = { + 'id': docname, + 'title': title, + } + + is_root_doc = self.config.root_doc == docname + if is_root_doc: + entry['isRoot'] = True + + parent_docname = self.state.parent_docname(docname) + if parent_docname: + parent_title = self.state.title(parent_docname) + + entry['parentId'] = parent_docname + entry['parentTitle'] = parent_title + + entry.update({ + 'hash': { + # Note that this hash will be of the contents with LF + # line endings. For output generated on Windows, the + # hash here will not explicit match the hash of the file. + # This is fine as this hash is mainly to help identify + # the uniqueness of the content. + 'sha256': ConfluenceUtil.hash(output), + }, + 'path': self._resolve_path(out_file, out_dir), + }) + + if self.config.confluence_manifest_data: + entry['data'] = b64encode(output.encode('utf-8')).decode() + + pages = self.data.setdefault('pages', []) + pages.append(entry) # type: ignore [attr-defined] + + def add_attachment(self, docname: str, key: str, mime: str, hash_: str, + path: Path, out_dir: Path) -> None: + """ + add an attachment into the manifest + + For any attachment that is processed, this call is used to track it + into the manifest cache. This includes using the expected attachment + name, the page that should hold the attachment and more. + + Args: + docname: the docname that should hold this attachment + key: the identifier to use for an attachment on publish + mime: the media type of the attachment + hash_: the hash of the attachment + path: the relative path to the attachment + out_dir: the base folder for any output data + """ + + title = self.state.title(docname) + + entry = { + 'id': key, + 'pageId': docname, + 'pageTitle': title, + 'mimeType': mime, + 'hash': { + 'sha256': hash_, + }, + 'path': self._resolve_path(path, out_dir), + } + + if self.config.confluence_manifest_data: + with path.open('rb') as fp: + entry['data'] = b64encode(fp.read()).decode() + + attachments = self.data.setdefault('attachments', []) + attachments.append(entry) # type: ignore [attr-defined] + + def export(self, out_dir: Path) -> None: + """ + export the manifest content + + When an export is requested, the contents will be published into + a `scb-manifest.json` file into the project's output directory. + + Args: + out_dir: the folder to output the manifest into + """ + + from sphinxcontrib.confluencebuilder import __version__ as scb_version + self.data.update({ + 'confluencebuilderVersion': scb_version, + 'sphinxVersion': sphinx_version, + 'docutilsVersion': docutils_version, + 'generated': datetime.now(timezone.utc).isoformat(), + }) + + manifest_path = out_dir / 'scb-manifest.json' + with manifest_path.open('w') as fp: + json.dump(self.data, fp, indent=4) + + def _resolve_path(self, path: Path, base: Path) -> str: + """ + resolve a page/attachment path based off a base path + + We attempt to provide a path in the manifest if tooling wishes to + reference/use a given page/attachment file. The path will be relative + to the output directory. + + Note that it is possible for an attachment to exist outside of the + output directory. + + Args: + path: the path of the file + base: the output directory to be relative to + + Returns: + the relative path + """ + + return str(Path(os.path.relpath(path, base)).as_posix())