From 81cd3d7d85aaa784bee2935f8320854090dbefe2 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 11 Jul 2021 17:27:42 -0700 Subject: [PATCH] add html_helpers.py to templates --- pdoc_templates/html_helpers.py | 540 +++++++++++++++++++++++++++++++++ 1 file changed, 540 insertions(+) create mode 100644 pdoc_templates/html_helpers.py diff --git a/pdoc_templates/html_helpers.py b/pdoc_templates/html_helpers.py new file mode 100644 index 0000000..a0be764 --- /dev/null +++ b/pdoc_templates/html_helpers.py @@ -0,0 +1,540 @@ +""" +Helper functions for HTML output. +""" +import inspect +import os +import re +import subprocess +import traceback +from functools import partial, lru_cache +from typing import Callable, Match +from warnings import warn + +import markdown +from markdown.inlinepatterns import InlineProcessor +from markdown.util import AtomicString, etree + +import pdoc + + +@lru_cache() +def minify_css(css: str, + _whitespace=partial(re.compile(r'\s*([,{:;}])\s*').sub, r'\1'), + _comments=partial(re.compile(r'/\*.*?\*/', flags=re.DOTALL).sub, ''), + _trailing_semicolon=partial(re.compile(r';\s*}').sub, '}')): + """ + Minify CSS by removing extraneous whitespace, comments, and trailing semicolons. + """ + return _trailing_semicolon(_whitespace(_comments(css))).strip() + + +def minify_html(html: str, + _minify=partial( + re.compile(r'(.*?)()|(.*)', re.IGNORECASE | re.DOTALL).sub, + lambda m, _norm_space=partial(re.compile(r'\s\s+').sub, '\n'): ( + _norm_space(m.group(1) or '') + + (m.group(2) or '') + + _norm_space(m.group(3) or '')))): + """ + Minify HTML by replacing all consecutive whitespace with a single space + (or newline) character, except inside `
` tags.
+    """
+    return _minify(html)
+
+
+def glimpse(text: str, max_length=153, *, paragraph=True,
+            _split_paragraph=partial(re.compile(r'\s*\n\s*\n\s*').split, maxsplit=1),
+            _trim_last_word=partial(re.compile(r'\S+$').sub, ''),
+            _remove_titles=partial(re.compile(r'^(#+|-{4,}|={4,})', re.MULTILINE).sub, ' ')):
+    """
+    Returns a short excerpt (e.g. first paragraph) of text.
+    If `paragraph` is True, the first paragraph will be returned,
+    but never longer than `max_length` characters.
+    """
+    text = text.lstrip()
+    if paragraph:
+        text, *rest = _split_paragraph(text)
+        if rest:
+            text = text.rstrip('.')
+            text += ' …'
+        text = _remove_titles(text).strip()
+
+    if len(text) > max_length:
+        text = _trim_last_word(text[:max_length - 2])
+        if not text.endswith('.') or not paragraph:
+            text = text.rstrip('. ') + ' …'
+    return text
+
+
+_md = markdown.Markdown(
+    output_format='html5',
+    extensions=[
+        "markdown.extensions.abbr",
+        "markdown.extensions.attr_list",
+        "markdown.extensions.def_list",
+        "markdown.extensions.fenced_code",
+        "markdown.extensions.footnotes",
+        "markdown.extensions.tables",
+        "markdown.extensions.admonition",
+        "markdown.extensions.smarty",
+        "markdown.extensions.toc",
+    ],
+    extension_configs={
+        "markdown.extensions.smarty": dict(
+            smart_dashes=True,
+            smart_ellipses=True,
+            smart_quotes=False,
+            smart_angled_quotes=False,
+        ),
+    },
+)
+
+
+class _ToMarkdown:
+    """
+    This class serves as a namespace for methods converting common
+    documentation formats into markdown our Python-Markdown with
+    addons can ingest.
+
+    If debugging regexs (I can't imagine why that would be necessary
+    — they are all perfect!) an insta-preview tool such as RegEx101.com
+    will come in handy.
+    """
+    @staticmethod
+    def _deflist(name, type, desc,
+                 # Wraps any identifiers and string literals in parameter type spec
+                 # in backticks while skipping common "stopwords" such as 'or', 'of',
+                 # 'optional' ... See §4 Parameters:
+                 # https://numpydoc.readthedocs.io/en/latest/format.html#sections
+                 _type_parts=partial(
+                     re.compile(r'[\w.\'"]+').sub,
+                     lambda m: ('{}' if m.group(0) in ('of', 'or', 'default', 'optional') else
+                                '`{}`').format(m.group(0)))):
+        """
+        Returns `name`, `type`, and `desc` formatted as a
+        Python-Markdown definition list entry. See also:
+        https://python-markdown.github.io/extensions/definition_lists/
+        """
+        type = _type_parts(type or '')
+        desc = desc or ' '
+        assert _ToMarkdown._is_indented_4_spaces(desc)
+        assert name or type
+        ret = ""
+        if name:
+            ret += '**`{}`**'.format(name)
+        if type:
+            ret += ' : {}'.format(type) if ret else type
+        ret += '\n:   {}\n\n'.format(desc)
+        return ret
+
+    @staticmethod
+    def _numpy_params(match,
+                      _name_parts=partial(re.compile(', ').sub, '`**, **`')):
+        """ Converts NumpyDoc parameter (etc.) sections into Markdown. """
+        name, type, desc = match.group("name", "type", "desc")
+        type = type or match.groupdict().get('just_type', None)
+        desc = desc.strip()
+        name = name and _name_parts(name)
+        return _ToMarkdown._deflist(name, type, desc)
+
+    @staticmethod
+    def _numpy_seealso(match):
+        """
+        Converts NumpyDoc "See Also" section either into referenced code,
+        optionally within a definition list.
+        """
+        spec_with_desc, simple_list = match.groups()
+        if spec_with_desc:
+            return '\n\n'.join('`{}`\n:   {}'.format(*map(str.strip, line.split(':', 1)))
+                               for line in filter(None, spec_with_desc.split('\n')))
+        return ', '.join('`{}`'.format(i) for i in simple_list.split(', '))
+
+    @staticmethod
+    def _numpy_sections(match):
+        """
+        Convert sections with parameter, return, and see also lists to Markdown
+        lists.
+        """
+        section, body = match.groups()
+        if section.title() == 'See Also':
+            body = re.sub(r'^((?:\n?[\w.]* ?: .*)+)|(.*\w.*)',
+                          _ToMarkdown._numpy_seealso, body)
+        elif section.title() in ('Returns', 'Yields', 'Raises', 'Warns'):
+            body = re.sub(r'^(?:(?P\*{0,2}\w+(?:, \*{0,2}\w+)*)'
+                          r'(?: ?: (?P.*))|'
+                          r'(?P\w[^\n`*]*))(?(?:\n(?: {4}.*|$))*)',
+                          _ToMarkdown._numpy_params, body, flags=re.MULTILINE)
+        else:
+            body = re.sub(r'^(?P\*{0,2}\w+(?:, \*{0,2}\w+)*)'
+                          r'(?: ?: (?P.*))?(?(?:\n(?: {4}.*|$))*)',
+                          _ToMarkdown._numpy_params, body, flags=re.MULTILINE)
+        return section + '\n-----\n' + body
+
+    @staticmethod
+    def numpy(text):
+        """
+        Convert `text` in numpydoc docstring format to Markdown
+        to be further converted later.
+        """
+        return re.sub(r'^(\w[\w ]+)\n-{3,}\n'
+                      r'((?:(?!.+\n-+).*$\n?)*)',
+                      _ToMarkdown._numpy_sections, text, flags=re.MULTILINE)
+
+    @staticmethod
+    def _is_indented_4_spaces(txt, _3_spaces_or_less=re.compile(r'\n\s{0,3}\S').search):
+        return '\n' not in txt or not _3_spaces_or_less(txt)
+
+    @staticmethod
+    def _fix_indent(name, type, desc):
+        """Maybe fix indent from 2 to 4 spaces."""
+        if not _ToMarkdown._is_indented_4_spaces(desc):
+            desc = desc.replace('\n', '\n  ')
+        return name, type, desc
+
+    @staticmethod
+    def indent(indent, text, *, clean_first=False):
+        if clean_first:
+            text = inspect.cleandoc(text)
+        return re.sub(r'\n', '\n' + indent, indent + text.rstrip())
+
+    @staticmethod
+    def google(text,
+               _googledoc_sections=partial(
+                   re.compile(r'^([A-Z]\w+):$\n((?:\n?(?: {2,}.*|$))+)', re.MULTILINE).sub,
+                   lambda m, _params=partial(
+                           re.compile(r'^([\w*]+)(?: \(([\w.,=\[\] ]+)\))?: '
+                                      r'((?:.*)(?:\n(?: {2,}.*|$))*)', re.MULTILINE).sub,
+                           lambda m: _ToMarkdown._deflist(*_ToMarkdown._fix_indent(*m.groups()))): (
+                       m.group() if not m.group(2) else '\n{}\n-----\n{}'.format(
+                           m.group(1), _params(inspect.cleandoc('\n' + m.group(2))))))):
+        """
+        Convert `text` in Google-style docstring format to Markdown
+        to be further converted later.
+        """
+        return _googledoc_sections(text)
+
+    @staticmethod
+    def _admonition(match, module=None, limit_types=None):
+        indent, type, value, text = match.groups()
+
+        if limit_types and type not in limit_types:
+            return match.group(0)
+
+        if type == 'include' and module:
+            try:
+                return _ToMarkdown._include_file(indent, value,
+                                                 _ToMarkdown._directive_opts(text), module)
+            except Exception as e:
+                raise RuntimeError('`.. include:: {}` error in module {!r}: {}'
+                                   .format(value, module.name, e))
+        if type in ('image', 'figure'):
+            return '{}![{}]({})\n'.format(
+                indent, text.translate(str.maketrans({'\n': ' ',
+                                                      '[': '\\[',
+                                                      ']': '\\]'})).strip(), value)
+        if type == 'math':
+            return _ToMarkdown.indent(indent,
+                                      '\\[ ' + text.strip() + ' \\]',
+                                      clean_first=True)
+
+        if type == 'versionchanged':
+            title = 'Changed in version: ' + value
+        elif type == 'versionadded':
+            title = 'Added in version: ' + value
+        elif type == 'deprecated' and value:
+            title = 'Deprecated since version: ' + value
+        elif type == 'admonition':
+            title = value
+        elif type.lower() == 'todo':
+            title = 'TODO'
+            text = value + ' ' + text
+        else:
+            title = type.capitalize()
+            if value:
+                title += ': ' + value
+
+        text = _ToMarkdown.indent(indent + '    ', text, clean_first=True)
+        return '{}!!! {} "{}"\n{}\n'.format(indent, type, title, text)
+
+    @staticmethod
+    def admonitions(text, module, limit_types=None):
+        """
+        Process reStructuredText's block directives such as
+        `.. warning::`, `.. deprecated::`, `.. versionadded::`, etc.
+        and turn them into Python-M>arkdown admonitions.
+
+        `limit_types` is optionally a set of directives to limit processing to.
+
+        See: https://python-markdown.github.io/extensions/admonition/
+        """
+        substitute = partial(re.compile(r'^(?P *)\.\. ?(\w+)::(?: *(.*))?'
+                                        r'((?:\n(?:(?P=indent) +.*| *$))*)', re.MULTILINE).sub,
+                             partial(_ToMarkdown._admonition, module=module,
+                                     limit_types=limit_types))
+        # Apply twice for nested (e.g. image inside warning)
+        return substitute(substitute(text))
+
+    @staticmethod
+    def _include_file(indent: str, path: str, options: dict, module: pdoc.Module) -> str:
+        start_line = int(options.get('start-line', 0))
+        end_line = int(options.get('end-line', 0)) or None
+        start_after = options.get('start-after')
+        end_before = options.get('end-before')
+
+        with open(os.path.join(os.path.dirname(module.obj.__file__), path),
+                  encoding='utf-8') as f:
+            text = ''.join(list(f)[start_line:end_line])
+
+        if start_after:
+            text = text[text.index(start_after) + len(start_after):]
+        if end_before:
+            text = text[:text.index(end_before)]
+
+        return _ToMarkdown.indent(indent, text)
+
+    @staticmethod
+    def _directive_opts(text: str) -> dict:
+        return dict(re.findall(r'^ *:([^:]+): *(.*)', text, re.MULTILINE))
+
+    @staticmethod
+    def doctests(text,
+                 _indent_doctests=partial(
+                     re.compile(r'(?:^(?P```|~~~).*\n)?'
+                                r'(?:^>>>.*'
+                                r'(?:\n(?:(?:>>>|\.\.\.).*))*'
+                                r'(?:\n.*)?\n\n?)+'
+                                r'(?P=fence)?', re.MULTILINE).sub,
+                     lambda m: (m.group(0) if m.group('fence') else
+                                ('\n    ' + '\n    '.join(m.group(0).split('\n')) + '\n\n')))):
+        """
+        Indent non-fenced (`~~~`) top-level (0-indented)
+        doctest blocks so they render as code.
+        """
+        if not text.endswith('\n'):  # Needed for the r'(?:\n.*)?\n\n?)+' line (GH-72)
+            text += '\n'
+        return _indent_doctests(text)
+
+    @staticmethod
+    def raw_urls(text):
+        """Wrap URLs in Python-Markdown-compatible ."""
+        return re.sub(r'(?)\s]+)(\s*)', r'\1<\2>\3', text)
+
+import subprocess
+
+class _MathPattern(InlineProcessor):
+    NAME = 'pdoc-math'
+    PATTERN = r'(?'):  # CUT was put into its own paragraph
+        toc = toc[:-3].rstrip()
+    return toc
+
+
+def format_git_link(template: str, dobj: pdoc.Doc):
+    """
+    Interpolate `template` as a formatted string literal using values extracted
+    from `dobj` and the working environment.
+    """
+    if not template:
+        return None
+    try:
+        if 'commit' in _str_template_fields(template):
+            commit = _git_head_commit()
+        abs_path = inspect.getfile(inspect.unwrap(dobj.obj))
+        path = _project_relative_path(abs_path)
+        lines, start_line = inspect.getsourcelines(dobj.obj)
+        end_line = start_line + len(lines) - 1
+        url = template.format(**locals())
+        return url
+    except Exception:
+        warn('format_git_link for {} failed:\n{}'.format(dobj.obj, traceback.format_exc()))
+        return None
+
+
+@lru_cache()
+def _git_head_commit():
+    """
+    If the working directory is part of a git repository, return the
+    head git commit hash. Otherwise, raise a CalledProcessError.
+    """
+    process_args = ['git', 'rev-parse', 'HEAD']
+    try:
+        commit = subprocess.check_output(process_args, universal_newlines=True).strip()
+        return commit
+    except OSError as error:
+        warn("git executable not found on system:\n{}".format(error))
+    except subprocess.CalledProcessError as error:
+        warn(
+            "Ensure pdoc is run within a git repository.\n"
+            "`{}` failed with output:\n{}"
+            .format(' '.join(process_args), error.output)
+        )
+    return None
+
+
+@lru_cache()
+def _git_project_root():
+    """
+    Return the path to project root directory or None if indeterminate.
+    """
+    path = None
+    for cmd in (['git', 'rev-parse', '--show-superproject-working-tree'],
+                ['git', 'rev-parse', '--show-toplevel']):
+        try:
+            path = subprocess.check_output(cmd, universal_newlines=True).rstrip('\r\n')
+            if path:
+                break
+        except (subprocess.CalledProcessError, OSError):
+            pass
+    return path
+
+
+@lru_cache()
+def _project_relative_path(absolute_path):
+    """
+    Convert an absolute path of a python source file to a project-relative path.
+    Assumes the project's path is either the current working directory or
+    Python library installation.
+    """
+    from distutils.sysconfig import get_python_lib
+    for prefix_path in (_git_project_root() or os.getcwd(),
+                        get_python_lib()):
+        common_path = os.path.commonpath([prefix_path, absolute_path])
+        if common_path == prefix_path:
+            # absolute_path is a descendant of prefix_path
+            return os.path.relpath(absolute_path, prefix_path)
+    raise RuntimeError(
+        "absolute path {!r} is not a descendant of the current working directory "
+        "or of the system's python library."
+        .format(absolute_path)
+    )
+
+
+@lru_cache()
+def _str_template_fields(template):
+    """
+    Return a list of `str.format` field names in a template string.
+    """
+    from string import Formatter
+    return [
+        field_name
+        for _, field_name, _, _ in Formatter().parse(template)
+        if field_name is not None
+    ]