""" Helper functions for HTML output. """ import inspect import os import re import subprocess import traceback from functools import partial, lru_cache from typing import Callable, Match from warnings import warn import markdown from markdown.inlinepatterns import InlineProcessor from markdown.util import AtomicString, etree import pdoc @lru_cache() def minify_css(css: str, _whitespace=partial(re.compile(r'\s*([,{:;}])\s*').sub, r'\1'), _comments=partial(re.compile(r'/\*.*?\*/', flags=re.DOTALL).sub, ''), _trailing_semicolon=partial(re.compile(r';\s*}').sub, '}')): """ Minify CSS by removing extraneous whitespace, comments, and trailing semicolons. """ return _trailing_semicolon(_whitespace(_comments(css))).strip() def minify_html(html: str, _minify=partial( re.compile(r'(.*?)(
)|(.*)', re.IGNORECASE | re.DOTALL).sub, lambda m, _norm_space=partial(re.compile(r'\s\s+').sub, '\n'): ( _norm_space(m.group(1) or '') + (m.group(2) or '') + _norm_space(m.group(3) or '')))): """ Minify HTML by replacing all consecutive whitespace with a single space (or newline) character, except inside `` tags. """ return _minify(html) def glimpse(text: str, max_length=153, *, paragraph=True, _split_paragraph=partial(re.compile(r'\s*\n\s*\n\s*').split, maxsplit=1), _trim_last_word=partial(re.compile(r'\S+$').sub, ''), _remove_titles=partial(re.compile(r'^(#+|-{4,}|={4,})', re.MULTILINE).sub, ' ')): """ Returns a short excerpt (e.g. first paragraph) of text. If `paragraph` is True, the first paragraph will be returned, but never longer than `max_length` characters. """ text = text.lstrip() if paragraph: text, *rest = _split_paragraph(text) if rest: text = text.rstrip('.') text += ' …' text = _remove_titles(text).strip() if len(text) > max_length: text = _trim_last_word(text[:max_length - 2]) if not text.endswith('.') or not paragraph: text = text.rstrip('. ') + ' …' return text _md = markdown.Markdown( output_format='html5', extensions=[ "markdown.extensions.abbr", "markdown.extensions.attr_list", "markdown.extensions.def_list", "markdown.extensions.fenced_code", "markdown.extensions.footnotes", "markdown.extensions.tables", "markdown.extensions.admonition", "markdown.extensions.smarty", "markdown.extensions.toc", ], extension_configs={ "markdown.extensions.smarty": dict( smart_dashes=True, smart_ellipses=True, smart_quotes=False, smart_angled_quotes=False, ), }, ) class _ToMarkdown: """ This class serves as a namespace for methods converting common documentation formats into markdown our Python-Markdown with addons can ingest. If debugging regexs (I can't imagine why that would be necessary — they are all perfect!) an insta-preview tool such as RegEx101.com will come in handy. """ @staticmethod def _deflist(name, type, desc, # Wraps any identifiers and string literals in parameter type spec # in backticks while skipping common "stopwords" such as 'or', 'of', # 'optional' ... See §4 Parameters: # https://numpydoc.readthedocs.io/en/latest/format.html#sections _type_parts=partial( re.compile(r'[\w.\'"]+').sub, lambda m: ('{}' if m.group(0) in ('of', 'or', 'default', 'optional') else '`{}`').format(m.group(0)))): """ Returns `name`, `type`, and `desc` formatted as a Python-Markdown definition list entry. See also: https://python-markdown.github.io/extensions/definition_lists/ """ type = _type_parts(type or '') desc = desc or ' ' assert _ToMarkdown._is_indented_4_spaces(desc) assert name or type ret = "" if name: ret += '**`{}`**'.format(name) if type: ret += ' : {}'.format(type) if ret else type ret += '\n: {}\n\n'.format(desc) return ret @staticmethod def _numpy_params(match, _name_parts=partial(re.compile(', ').sub, '`**, **`')): """ Converts NumpyDoc parameter (etc.) sections into Markdown. """ name, type, desc = match.group("name", "type", "desc") type = type or match.groupdict().get('just_type', None) desc = desc.strip() name = name and _name_parts(name) return _ToMarkdown._deflist(name, type, desc) @staticmethod def _numpy_seealso(match): """ Converts NumpyDoc "See Also" section either into referenced code, optionally within a definition list. """ spec_with_desc, simple_list = match.groups() if spec_with_desc: return '\n\n'.join('`{}`\n: {}'.format(*map(str.strip, line.split(':', 1))) for line in filter(None, spec_with_desc.split('\n'))) return ', '.join('`{}`'.format(i) for i in simple_list.split(', ')) @staticmethod def _numpy_sections(match): """ Convert sections with parameter, return, and see also lists to Markdown lists. """ section, body = match.groups() if section.title() == 'See Also': body = re.sub(r'^((?:\n?[\w.]* ?: .*)+)|(.*\w.*)', _ToMarkdown._numpy_seealso, body) elif section.title() in ('Returns', 'Yields', 'Raises', 'Warns'): body = re.sub(r'^(?:(?P\*{0,2}\w+(?:, \*{0,2}\w+)*)' r'(?: ?: (?P .*))|' r'(?P \w[^\n`*]*))(?(?:\n(?: {4}.*|$))*)', _ToMarkdown._numpy_params, body, flags=re.MULTILINE) else: body = re.sub(r'^(?P \*{0,2}\w+(?:, \*{0,2}\w+)*)' r'(?: ?: (?P .*))?(?(?:\n(?: {4}.*|$))*)', _ToMarkdown._numpy_params, body, flags=re.MULTILINE) return section + '\n-----\n' + body @staticmethod def numpy(text): """ Convert `text` in numpydoc docstring format to Markdown to be further converted later. """ return re.sub(r'^(\w[\w ]+)\n-{3,}\n' r'((?:(?!.+\n-+).*$\n?)*)', _ToMarkdown._numpy_sections, text, flags=re.MULTILINE) @staticmethod def _is_indented_4_spaces(txt, _3_spaces_or_less=re.compile(r'\n\s{0,3}\S').search): return '\n' not in txt or not _3_spaces_or_less(txt) @staticmethod def _fix_indent(name, type, desc): """Maybe fix indent from 2 to 4 spaces.""" if not _ToMarkdown._is_indented_4_spaces(desc): desc = desc.replace('\n', '\n ') return name, type, desc @staticmethod def indent(indent, text, *, clean_first=False): if clean_first: text = inspect.cleandoc(text) return re.sub(r'\n', '\n' + indent, indent + text.rstrip()) @staticmethod def google(text, _googledoc_sections=partial( re.compile(r'^([A-Z]\w+):$\n((?:\n?(?: {2,}.*|$))+)', re.MULTILINE).sub, lambda m, _params=partial( re.compile(r'^([\w*]+)(?: \(([\w.,=\[\] ]+)\))?: ' r'((?:.*)(?:\n(?: {2,}.*|$))*)', re.MULTILINE).sub, lambda m: _ToMarkdown._deflist(*_ToMarkdown._fix_indent(*m.groups()))): ( m.group() if not m.group(2) else '\n{}\n-----\n{}'.format( m.group(1), _params(inspect.cleandoc('\n' + m.group(2))))))): """ Convert `text` in Google-style docstring format to Markdown to be further converted later. """ return _googledoc_sections(text) @staticmethod def _admonition(match, module=None, limit_types=None): indent, type, value, text = match.groups() if limit_types and type not in limit_types: return match.group(0) if type == 'include' and module: try: return _ToMarkdown._include_file(indent, value, _ToMarkdown._directive_opts(text), module) except Exception as e: raise RuntimeError('`.. include:: {}` error in module {!r}: {}' .format(value, module.name, e)) if type in ('image', 'figure'): return '{}![{}]({})\n'.format( indent, text.translate(str.maketrans({'\n': ' ', '[': '\\[', ']': '\\]'})).strip(), value) if type == 'math': return _ToMarkdown.indent(indent, '\\[ ' + text.strip() + ' \\]', clean_first=True) if type == 'versionchanged': title = 'Changed in version: ' + value elif type == 'versionadded': title = 'Added in version: ' + value elif type == 'deprecated' and value: title = 'Deprecated since version: ' + value elif type == 'admonition': title = value elif type.lower() == 'todo': title = 'TODO' text = value + ' ' + text else: title = type.capitalize() if value: title += ': ' + value text = _ToMarkdown.indent(indent + ' ', text, clean_first=True) return '{}!!! {} "{}"\n{}\n'.format(indent, type, title, text) @staticmethod def admonitions(text, module, limit_types=None): """ Process reStructuredText's block directives such as `.. warning::`, `.. deprecated::`, `.. versionadded::`, etc. and turn them into Python-M>arkdown admonitions. `limit_types` is optionally a set of directives to limit processing to. See: https://python-markdown.github.io/extensions/admonition/ """ substitute = partial(re.compile(r'^(?P *)\.\. ?(\w+)::(?: *(.*))?' r'((?:\n(?:(?P=indent) +.*| *$))*)', re.MULTILINE).sub, partial(_ToMarkdown._admonition, module=module, limit_types=limit_types)) # Apply twice for nested (e.g. image inside warning) return substitute(substitute(text)) @staticmethod def _include_file(indent: str, path: str, options: dict, module: pdoc.Module) -> str: start_line = int(options.get('start-line', 0)) end_line = int(options.get('end-line', 0)) or None start_after = options.get('start-after') end_before = options.get('end-before') with open(os.path.join(os.path.dirname(module.obj.__file__), path), encoding='utf-8') as f: text = ''.join(list(f)[start_line:end_line]) if start_after: text = text[text.index(start_after) + len(start_after):] if end_before: text = text[:text.index(end_before)] return _ToMarkdown.indent(indent, text) @staticmethod def _directive_opts(text: str) -> dict: return dict(re.findall(r'^ *:([^:]+): *(.*)', text, re.MULTILINE)) @staticmethod def doctests(text, _indent_doctests=partial( re.compile(r'(?:^(?P ```|~~~).*\n)?' r'(?:^>>>.*' r'(?:\n(?:(?:>>>|\.\.\.).*))*' r'(?:\n.*)?\n\n?)+' r'(?P=fence)?', re.MULTILINE).sub, lambda m: (m.group(0) if m.group('fence') else ('\n ' + '\n '.join(m.group(0).split('\n')) + '\n\n')))): """ Indent non-fenced (`~~~`) top-level (0-indented) doctest blocks so they render as code. """ if not text.endswith('\n'): # Needed for the r'(?:\n.*)?\n\n?)+' line (GH-72) text += '\n' return _indent_doctests(text) @staticmethod def raw_urls(text): """Wrap URLs in Python-Markdown-compatible .""" return re.sub(r'(?)\s]+)(\s*)', r'\1<\2>\3', text) import subprocess class _MathPattern(InlineProcessor): NAME = 'pdoc-math' PATTERN = r'(?'): # CUT was put into its own paragraph toc = toc[:-3].rstrip() return toc def format_git_link(template: str, dobj: pdoc.Doc): """ Interpolate `template` as a formatted string literal using values extracted from `dobj` and the working environment. """ if not template: return None try: if 'commit' in _str_template_fields(template): commit = _git_head_commit() abs_path = inspect.getfile(inspect.unwrap(dobj.obj)) path = _project_relative_path(abs_path) lines, start_line = inspect.getsourcelines(dobj.obj) end_line = start_line + len(lines) - 1 url = template.format(**locals()) return url except Exception: warn('format_git_link for {} failed:\n{}'.format(dobj.obj, traceback.format_exc())) return None @lru_cache() def _git_head_commit(): """ If the working directory is part of a git repository, return the head git commit hash. Otherwise, raise a CalledProcessError. """ process_args = ['git', 'rev-parse', 'HEAD'] try: commit = subprocess.check_output(process_args, universal_newlines=True).strip() return commit except OSError as error: warn("git executable not found on system:\n{}".format(error)) except subprocess.CalledProcessError as error: warn( "Ensure pdoc is run within a git repository.\n" "`{}` failed with output:\n{}" .format(' '.join(process_args), error.output) ) return None @lru_cache() def _git_project_root(): """ Return the path to project root directory or None if indeterminate. """ path = None for cmd in (['git', 'rev-parse', '--show-superproject-working-tree'], ['git', 'rev-parse', '--show-toplevel']): try: path = subprocess.check_output(cmd, universal_newlines=True).rstrip('\r\n') if path: break except (subprocess.CalledProcessError, OSError): pass return path @lru_cache() def _project_relative_path(absolute_path): """ Convert an absolute path of a python source file to a project-relative path. Assumes the project's path is either the current working directory or Python library installation. """ from distutils.sysconfig import get_python_lib for prefix_path in (_git_project_root() or os.getcwd(), get_python_lib()): common_path = os.path.commonpath([prefix_path, absolute_path]) if common_path == prefix_path: # absolute_path is a descendant of prefix_path return os.path.relpath(absolute_path, prefix_path) raise RuntimeError( "absolute path {!r} is not a descendant of the current working directory " "or of the system's python library." .format(absolute_path) ) @lru_cache() def _str_template_fields(template): """ Return a list of `str.format` field names in a template string. """ from string import Formatter return [ field_name for _, field_name, _, _ in Formatter().parse(template) if field_name is not None ]