diff --git a/pdoc_templates/html_helpers.py b/pdoc_templates/html_helpers.py new file mode 100644 index 0000000..a0be764 --- /dev/null +++ b/pdoc_templates/html_helpers.py @@ -0,0 +1,540 @@ +""" +Helper functions for HTML output. +""" +import inspect +import os +import re +import subprocess +import traceback +from functools import partial, lru_cache +from typing import Callable, Match +from warnings import warn + +import markdown +from markdown.inlinepatterns import InlineProcessor +from markdown.util import AtomicString, etree + +import pdoc + + +@lru_cache() +def minify_css(css: str, + _whitespace=partial(re.compile(r'\s*([,{:;}])\s*').sub, r'\1'), + _comments=partial(re.compile(r'/\*.*?\*/', flags=re.DOTALL).sub, ''), + _trailing_semicolon=partial(re.compile(r';\s*}').sub, '}')): + """ + Minify CSS by removing extraneous whitespace, comments, and trailing semicolons. + """ + return _trailing_semicolon(_whitespace(_comments(css))).strip() + + +def minify_html(html: str, + _minify=partial( + re.compile(r'(.*?)(
)|(.*)', re.IGNORECASE | re.DOTALL).sub, + lambda m, _norm_space=partial(re.compile(r'\s\s+').sub, '\n'): ( + _norm_space(m.group(1) or '') + + (m.group(2) or '') + + _norm_space(m.group(3) or '')))): + """ + Minify HTML by replacing all consecutive whitespace with a single space + (or newline) character, except inside `` tags. + """ + return _minify(html) + + +def glimpse(text: str, max_length=153, *, paragraph=True, + _split_paragraph=partial(re.compile(r'\s*\n\s*\n\s*').split, maxsplit=1), + _trim_last_word=partial(re.compile(r'\S+$').sub, ''), + _remove_titles=partial(re.compile(r'^(#+|-{4,}|={4,})', re.MULTILINE).sub, ' ')): + """ + Returns a short excerpt (e.g. first paragraph) of text. + If `paragraph` is True, the first paragraph will be returned, + but never longer than `max_length` characters. + """ + text = text.lstrip() + if paragraph: + text, *rest = _split_paragraph(text) + if rest: + text = text.rstrip('.') + text += ' …' + text = _remove_titles(text).strip() + + if len(text) > max_length: + text = _trim_last_word(text[:max_length - 2]) + if not text.endswith('.') or not paragraph: + text = text.rstrip('. ') + ' …' + return text + + +_md = markdown.Markdown( + output_format='html5', + extensions=[ + "markdown.extensions.abbr", + "markdown.extensions.attr_list", + "markdown.extensions.def_list", + "markdown.extensions.fenced_code", + "markdown.extensions.footnotes", + "markdown.extensions.tables", + "markdown.extensions.admonition", + "markdown.extensions.smarty", + "markdown.extensions.toc", + ], + extension_configs={ + "markdown.extensions.smarty": dict( + smart_dashes=True, + smart_ellipses=True, + smart_quotes=False, + smart_angled_quotes=False, + ), + }, +) + + +class _ToMarkdown: + """ + This class serves as a namespace for methods converting common + documentation formats into markdown our Python-Markdown with + addons can ingest. + + If debugging regexs (I can't imagine why that would be necessary + — they are all perfect!) an insta-preview tool such as RegEx101.com + will come in handy. + """ + @staticmethod + def _deflist(name, type, desc, + # Wraps any identifiers and string literals in parameter type spec + # in backticks while skipping common "stopwords" such as 'or', 'of', + # 'optional' ... See §4 Parameters: + # https://numpydoc.readthedocs.io/en/latest/format.html#sections + _type_parts=partial( + re.compile(r'[\w.\'"]+').sub, + lambda m: ('{}' if m.group(0) in ('of', 'or', 'default', 'optional') else + '`{}`').format(m.group(0)))): + """ + Returns `name`, `type`, and `desc` formatted as a + Python-Markdown definition list entry. See also: + https://python-markdown.github.io/extensions/definition_lists/ + """ + type = _type_parts(type or '') + desc = desc or ' ' + assert _ToMarkdown._is_indented_4_spaces(desc) + assert name or type + ret = "" + if name: + ret += '**`{}`**'.format(name) + if type: + ret += ' : {}'.format(type) if ret else type + ret += '\n: {}\n\n'.format(desc) + return ret + + @staticmethod + def _numpy_params(match, + _name_parts=partial(re.compile(', ').sub, '`**, **`')): + """ Converts NumpyDoc parameter (etc.) sections into Markdown. """ + name, type, desc = match.group("name", "type", "desc") + type = type or match.groupdict().get('just_type', None) + desc = desc.strip() + name = name and _name_parts(name) + return _ToMarkdown._deflist(name, type, desc) + + @staticmethod + def _numpy_seealso(match): + """ + Converts NumpyDoc "See Also" section either into referenced code, + optionally within a definition list. + """ + spec_with_desc, simple_list = match.groups() + if spec_with_desc: + return '\n\n'.join('`{}`\n: {}'.format(*map(str.strip, line.split(':', 1))) + for line in filter(None, spec_with_desc.split('\n'))) + return ', '.join('`{}`'.format(i) for i in simple_list.split(', ')) + + @staticmethod + def _numpy_sections(match): + """ + Convert sections with parameter, return, and see also lists to Markdown + lists. + """ + section, body = match.groups() + if section.title() == 'See Also': + body = re.sub(r'^((?:\n?[\w.]* ?: .*)+)|(.*\w.*)', + _ToMarkdown._numpy_seealso, body) + elif section.title() in ('Returns', 'Yields', 'Raises', 'Warns'): + body = re.sub(r'^(?:(?P\*{0,2}\w+(?:, \*{0,2}\w+)*)' + r'(?: ?: (?P .*))|' + r'(?P \w[^\n`*]*))(?(?:\n(?: {4}.*|$))*)', + _ToMarkdown._numpy_params, body, flags=re.MULTILINE) + else: + body = re.sub(r'^(?P \*{0,2}\w+(?:, \*{0,2}\w+)*)' + r'(?: ?: (?P .*))?(?(?:\n(?: {4}.*|$))*)', + _ToMarkdown._numpy_params, body, flags=re.MULTILINE) + return section + '\n-----\n' + body + + @staticmethod + def numpy(text): + """ + Convert `text` in numpydoc docstring format to Markdown + to be further converted later. + """ + return re.sub(r'^(\w[\w ]+)\n-{3,}\n' + r'((?:(?!.+\n-+).*$\n?)*)', + _ToMarkdown._numpy_sections, text, flags=re.MULTILINE) + + @staticmethod + def _is_indented_4_spaces(txt, _3_spaces_or_less=re.compile(r'\n\s{0,3}\S').search): + return '\n' not in txt or not _3_spaces_or_less(txt) + + @staticmethod + def _fix_indent(name, type, desc): + """Maybe fix indent from 2 to 4 spaces.""" + if not _ToMarkdown._is_indented_4_spaces(desc): + desc = desc.replace('\n', '\n ') + return name, type, desc + + @staticmethod + def indent(indent, text, *, clean_first=False): + if clean_first: + text = inspect.cleandoc(text) + return re.sub(r'\n', '\n' + indent, indent + text.rstrip()) + + @staticmethod + def google(text, + _googledoc_sections=partial( + re.compile(r'^([A-Z]\w+):$\n((?:\n?(?: {2,}.*|$))+)', re.MULTILINE).sub, + lambda m, _params=partial( + re.compile(r'^([\w*]+)(?: \(([\w.,=\[\] ]+)\))?: ' + r'((?:.*)(?:\n(?: {2,}.*|$))*)', re.MULTILINE).sub, + lambda m: _ToMarkdown._deflist(*_ToMarkdown._fix_indent(*m.groups()))): ( + m.group() if not m.group(2) else '\n{}\n-----\n{}'.format( + m.group(1), _params(inspect.cleandoc('\n' + m.group(2))))))): + """ + Convert `text` in Google-style docstring format to Markdown + to be further converted later. + """ + return _googledoc_sections(text) + + @staticmethod + def _admonition(match, module=None, limit_types=None): + indent, type, value, text = match.groups() + + if limit_types and type not in limit_types: + return match.group(0) + + if type == 'include' and module: + try: + return _ToMarkdown._include_file(indent, value, + _ToMarkdown._directive_opts(text), module) + except Exception as e: + raise RuntimeError('`.. include:: {}` error in module {!r}: {}' + .format(value, module.name, e)) + if type in ('image', 'figure'): + return '{}![{}]({})\n'.format( + indent, text.translate(str.maketrans({'\n': ' ', + '[': '\\[', + ']': '\\]'})).strip(), value) + if type == 'math': + return _ToMarkdown.indent(indent, + '\\[ ' + text.strip() + ' \\]', + clean_first=True) + + if type == 'versionchanged': + title = 'Changed in version: ' + value + elif type == 'versionadded': + title = 'Added in version: ' + value + elif type == 'deprecated' and value: + title = 'Deprecated since version: ' + value + elif type == 'admonition': + title = value + elif type.lower() == 'todo': + title = 'TODO' + text = value + ' ' + text + else: + title = type.capitalize() + if value: + title += ': ' + value + + text = _ToMarkdown.indent(indent + ' ', text, clean_first=True) + return '{}!!! {} "{}"\n{}\n'.format(indent, type, title, text) + + @staticmethod + def admonitions(text, module, limit_types=None): + """ + Process reStructuredText's block directives such as + `.. warning::`, `.. deprecated::`, `.. versionadded::`, etc. + and turn them into Python-M>arkdown admonitions. + + `limit_types` is optionally a set of directives to limit processing to. + + See: https://python-markdown.github.io/extensions/admonition/ + """ + substitute = partial(re.compile(r'^(?P *)\.\. ?(\w+)::(?: *(.*))?' + r'((?:\n(?:(?P=indent) +.*| *$))*)', re.MULTILINE).sub, + partial(_ToMarkdown._admonition, module=module, + limit_types=limit_types)) + # Apply twice for nested (e.g. image inside warning) + return substitute(substitute(text)) + + @staticmethod + def _include_file(indent: str, path: str, options: dict, module: pdoc.Module) -> str: + start_line = int(options.get('start-line', 0)) + end_line = int(options.get('end-line', 0)) or None + start_after = options.get('start-after') + end_before = options.get('end-before') + + with open(os.path.join(os.path.dirname(module.obj.__file__), path), + encoding='utf-8') as f: + text = ''.join(list(f)[start_line:end_line]) + + if start_after: + text = text[text.index(start_after) + len(start_after):] + if end_before: + text = text[:text.index(end_before)] + + return _ToMarkdown.indent(indent, text) + + @staticmethod + def _directive_opts(text: str) -> dict: + return dict(re.findall(r'^ *:([^:]+): *(.*)', text, re.MULTILINE)) + + @staticmethod + def doctests(text, + _indent_doctests=partial( + re.compile(r'(?:^(?P ```|~~~).*\n)?' + r'(?:^>>>.*' + r'(?:\n(?:(?:>>>|\.\.\.).*))*' + r'(?:\n.*)?\n\n?)+' + r'(?P=fence)?', re.MULTILINE).sub, + lambda m: (m.group(0) if m.group('fence') else + ('\n ' + '\n '.join(m.group(0).split('\n')) + '\n\n')))): + """ + Indent non-fenced (`~~~`) top-level (0-indented) + doctest blocks so they render as code. + """ + if not text.endswith('\n'): # Needed for the r'(?:\n.*)?\n\n?)+' line (GH-72) + text += '\n' + return _indent_doctests(text) + + @staticmethod + def raw_urls(text): + """Wrap URLs in Python-Markdown-compatible .""" + return re.sub(r'(?)\s]+)(\s*)', r'\1<\2>\3', text) + +import subprocess + +class _MathPattern(InlineProcessor): + NAME = 'pdoc-math' + PATTERN = r'(?'): # CUT was put into its own paragraph + toc = toc[:-3].rstrip() + return toc + + +def format_git_link(template: str, dobj: pdoc.Doc): + """ + Interpolate `template` as a formatted string literal using values extracted + from `dobj` and the working environment. + """ + if not template: + return None + try: + if 'commit' in _str_template_fields(template): + commit = _git_head_commit() + abs_path = inspect.getfile(inspect.unwrap(dobj.obj)) + path = _project_relative_path(abs_path) + lines, start_line = inspect.getsourcelines(dobj.obj) + end_line = start_line + len(lines) - 1 + url = template.format(**locals()) + return url + except Exception: + warn('format_git_link for {} failed:\n{}'.format(dobj.obj, traceback.format_exc())) + return None + + +@lru_cache() +def _git_head_commit(): + """ + If the working directory is part of a git repository, return the + head git commit hash. Otherwise, raise a CalledProcessError. + """ + process_args = ['git', 'rev-parse', 'HEAD'] + try: + commit = subprocess.check_output(process_args, universal_newlines=True).strip() + return commit + except OSError as error: + warn("git executable not found on system:\n{}".format(error)) + except subprocess.CalledProcessError as error: + warn( + "Ensure pdoc is run within a git repository.\n" + "`{}` failed with output:\n{}" + .format(' '.join(process_args), error.output) + ) + return None + + +@lru_cache() +def _git_project_root(): + """ + Return the path to project root directory or None if indeterminate. + """ + path = None + for cmd in (['git', 'rev-parse', '--show-superproject-working-tree'], + ['git', 'rev-parse', '--show-toplevel']): + try: + path = subprocess.check_output(cmd, universal_newlines=True).rstrip('\r\n') + if path: + break + except (subprocess.CalledProcessError, OSError): + pass + return path + + +@lru_cache() +def _project_relative_path(absolute_path): + """ + Convert an absolute path of a python source file to a project-relative path. + Assumes the project's path is either the current working directory or + Python library installation. + """ + from distutils.sysconfig import get_python_lib + for prefix_path in (_git_project_root() or os.getcwd(), + get_python_lib()): + common_path = os.path.commonpath([prefix_path, absolute_path]) + if common_path == prefix_path: + # absolute_path is a descendant of prefix_path + return os.path.relpath(absolute_path, prefix_path) + raise RuntimeError( + "absolute path {!r} is not a descendant of the current working directory " + "or of the system's python library." + .format(absolute_path) + ) + + +@lru_cache() +def _str_template_fields(template): + """ + Return a list of `str.format` field names in a template string. + """ + from string import Formatter + return [ + field_name + for _, field_name, _, _ in Formatter().parse(template) + if field_name is not None + ]