""" Helper functions for HTML output. """ import inspect import os import re import subprocess import traceback from functools import partial, lru_cache from typing import Callable, Match from warnings import warn import markdown from markdown.inlinepatterns import InlineProcessor from markdown.util import AtomicString, etree import pdoc @lru_cache() def minify_css(css: str, _whitespace=partial(re.compile(r'\s*([,{:;}])\s*').sub, r'\1'), _comments=partial(re.compile(r'/\*.*?\*/', flags=re.DOTALL).sub, ''), _trailing_semicolon=partial(re.compile(r';\s*}').sub, '}')): """ Minify CSS by removing extraneous whitespace, comments, and trailing semicolons. """ return _trailing_semicolon(_whitespace(_comments(css))).strip() def minify_html(html: str, _minify=partial( re.compile(r'(.*?)(
)|(.*)', re.IGNORECASE | re.DOTALL).sub,
lambda m, _norm_space=partial(re.compile(r'\s\s+').sub, '\n'): (
_norm_space(m.group(1) or '') +
(m.group(2) or '') +
_norm_space(m.group(3) or '')))):
"""
Minify HTML by replacing all consecutive whitespace with a single space
(or newline) character, except inside `` tags.
"""
return _minify(html)
def glimpse(text: str, max_length=153, *, paragraph=True,
_split_paragraph=partial(re.compile(r'\s*\n\s*\n\s*').split, maxsplit=1),
_trim_last_word=partial(re.compile(r'\S+$').sub, ''),
_remove_titles=partial(re.compile(r'^(#+|-{4,}|={4,})', re.MULTILINE).sub, ' ')):
"""
Returns a short excerpt (e.g. first paragraph) of text.
If `paragraph` is True, the first paragraph will be returned,
but never longer than `max_length` characters.
"""
text = text.lstrip()
if paragraph:
text, *rest = _split_paragraph(text)
if rest:
text = text.rstrip('.')
text += ' …'
text = _remove_titles(text).strip()
if len(text) > max_length:
text = _trim_last_word(text[:max_length - 2])
if not text.endswith('.') or not paragraph:
text = text.rstrip('. ') + ' …'
return text
_md = markdown.Markdown(
output_format='html5',
extensions=[
"markdown.extensions.abbr",
"markdown.extensions.attr_list",
"markdown.extensions.def_list",
"markdown.extensions.fenced_code",
"markdown.extensions.footnotes",
"markdown.extensions.tables",
"markdown.extensions.admonition",
"markdown.extensions.smarty",
"markdown.extensions.toc",
],
extension_configs={
"markdown.extensions.smarty": dict(
smart_dashes=True,
smart_ellipses=True,
smart_quotes=False,
smart_angled_quotes=False,
),
},
)
class _ToMarkdown:
"""
This class serves as a namespace for methods converting common
documentation formats into markdown our Python-Markdown with
addons can ingest.
If debugging regexs (I can't imagine why that would be necessary
— they are all perfect!) an insta-preview tool such as RegEx101.com
will come in handy.
"""
@staticmethod
def _deflist(name, type, desc,
# Wraps any identifiers and string literals in parameter type spec
# in backticks while skipping common "stopwords" such as 'or', 'of',
# 'optional' ... See §4 Parameters:
# https://numpydoc.readthedocs.io/en/latest/format.html#sections
_type_parts=partial(
re.compile(r'[\w.\'"]+').sub,
lambda m: ('{}' if m.group(0) in ('of', 'or', 'default', 'optional') else
'`{}`').format(m.group(0)))):
"""
Returns `name`, `type`, and `desc` formatted as a
Python-Markdown definition list entry. See also:
https://python-markdown.github.io/extensions/definition_lists/
"""
type = _type_parts(type or '')
desc = desc or ' '
assert _ToMarkdown._is_indented_4_spaces(desc)
assert name or type
ret = ""
if name:
ret += '**`{}`**'.format(name)
if type:
ret += ' : {}'.format(type) if ret else type
ret += '\n: {}\n\n'.format(desc)
return ret
@staticmethod
def _numpy_params(match,
_name_parts=partial(re.compile(', ').sub, '`**, **`')):
""" Converts NumpyDoc parameter (etc.) sections into Markdown. """
name, type, desc = match.group("name", "type", "desc")
type = type or match.groupdict().get('just_type', None)
desc = desc.strip()
name = name and _name_parts(name)
return _ToMarkdown._deflist(name, type, desc)
@staticmethod
def _numpy_seealso(match):
"""
Converts NumpyDoc "See Also" section either into referenced code,
optionally within a definition list.
"""
spec_with_desc, simple_list = match.groups()
if spec_with_desc:
return '\n\n'.join('`{}`\n: {}'.format(*map(str.strip, line.split(':', 1)))
for line in filter(None, spec_with_desc.split('\n')))
return ', '.join('`{}`'.format(i) for i in simple_list.split(', '))
@staticmethod
def _numpy_sections(match):
"""
Convert sections with parameter, return, and see also lists to Markdown
lists.
"""
section, body = match.groups()
if section.title() == 'See Also':
body = re.sub(r'^((?:\n?[\w.]* ?: .*)+)|(.*\w.*)',
_ToMarkdown._numpy_seealso, body)
elif section.title() in ('Returns', 'Yields', 'Raises', 'Warns'):
body = re.sub(r'^(?:(?P\*{0,2}\w+(?:, \*{0,2}\w+)*)'
r'(?: ?: (?P.*))|'
r'(?P\w[^\n`*]*))(?(?:\n(?: {4}.*|$))*)',
_ToMarkdown._numpy_params, body, flags=re.MULTILINE)
else:
body = re.sub(r'^(?P\*{0,2}\w+(?:, \*{0,2}\w+)*)'
r'(?: ?: (?P.*))?(?(?:\n(?: {4}.*|$))*)',
_ToMarkdown._numpy_params, body, flags=re.MULTILINE)
return section + '\n-----\n' + body
@staticmethod
def numpy(text):
"""
Convert `text` in numpydoc docstring format to Markdown
to be further converted later.
"""
return re.sub(r'^(\w[\w ]+)\n-{3,}\n'
r'((?:(?!.+\n-+).*$\n?)*)',
_ToMarkdown._numpy_sections, text, flags=re.MULTILINE)
@staticmethod
def _is_indented_4_spaces(txt, _3_spaces_or_less=re.compile(r'\n\s{0,3}\S').search):
return '\n' not in txt or not _3_spaces_or_less(txt)
@staticmethod
def _fix_indent(name, type, desc):
"""Maybe fix indent from 2 to 4 spaces."""
if not _ToMarkdown._is_indented_4_spaces(desc):
desc = desc.replace('\n', '\n ')
return name, type, desc
@staticmethod
def indent(indent, text, *, clean_first=False):
if clean_first:
text = inspect.cleandoc(text)
return re.sub(r'\n', '\n' + indent, indent + text.rstrip())
@staticmethod
def google(text,
_googledoc_sections=partial(
re.compile(r'^([A-Z]\w+):$\n((?:\n?(?: {2,}.*|$))+)', re.MULTILINE).sub,
lambda m, _params=partial(
re.compile(r'^([\w*]+)(?: \(([\w.,=\[\] ]+)\))?: '
r'((?:.*)(?:\n(?: {2,}.*|$))*)', re.MULTILINE).sub,
lambda m: _ToMarkdown._deflist(*_ToMarkdown._fix_indent(*m.groups()))): (
m.group() if not m.group(2) else '\n{}\n-----\n{}'.format(
m.group(1), _params(inspect.cleandoc('\n' + m.group(2))))))):
"""
Convert `text` in Google-style docstring format to Markdown
to be further converted later.
"""
return _googledoc_sections(text)
@staticmethod
def _admonition(match, module=None, limit_types=None):
indent, type, value, text = match.groups()
if limit_types and type not in limit_types:
return match.group(0)
if type == 'include' and module:
try:
return _ToMarkdown._include_file(indent, value,
_ToMarkdown._directive_opts(text), module)
except Exception as e:
raise RuntimeError('`.. include:: {}` error in module {!r}: {}'
.format(value, module.name, e))
if type in ('image', 'figure'):
return '{}\n'.format(
indent, text.translate(str.maketrans({'\n': ' ',
'[': '\\[',
']': '\\]'})).strip(), value)
if type == 'math':
return _ToMarkdown.indent(indent,
'\\[ ' + text.strip() + ' \\]',
clean_first=True)
if type == 'versionchanged':
title = 'Changed in version: ' + value
elif type == 'versionadded':
title = 'Added in version: ' + value
elif type == 'deprecated' and value:
title = 'Deprecated since version: ' + value
elif type == 'admonition':
title = value
elif type.lower() == 'todo':
title = 'TODO'
text = value + ' ' + text
else:
title = type.capitalize()
if value:
title += ': ' + value
text = _ToMarkdown.indent(indent + ' ', text, clean_first=True)
return '{}!!! {} "{}"\n{}\n'.format(indent, type, title, text)
@staticmethod
def admonitions(text, module, limit_types=None):
"""
Process reStructuredText's block directives such as
`.. warning::`, `.. deprecated::`, `.. versionadded::`, etc.
and turn them into Python-M>arkdown admonitions.
`limit_types` is optionally a set of directives to limit processing to.
See: https://python-markdown.github.io/extensions/admonition/
"""
substitute = partial(re.compile(r'^(?P *)\.\. ?(\w+)::(?: *(.*))?'
r'((?:\n(?:(?P=indent) +.*| *$))*)', re.MULTILINE).sub,
partial(_ToMarkdown._admonition, module=module,
limit_types=limit_types))
# Apply twice for nested (e.g. image inside warning)
return substitute(substitute(text))
@staticmethod
def _include_file(indent: str, path: str, options: dict, module: pdoc.Module) -> str:
start_line = int(options.get('start-line', 0))
end_line = int(options.get('end-line', 0)) or None
start_after = options.get('start-after')
end_before = options.get('end-before')
with open(os.path.join(os.path.dirname(module.obj.__file__), path),
encoding='utf-8') as f:
text = ''.join(list(f)[start_line:end_line])
if start_after:
text = text[text.index(start_after) + len(start_after):]
if end_before:
text = text[:text.index(end_before)]
return _ToMarkdown.indent(indent, text)
@staticmethod
def _directive_opts(text: str) -> dict:
return dict(re.findall(r'^ *:([^:]+): *(.*)', text, re.MULTILINE))
@staticmethod
def doctests(text,
_indent_doctests=partial(
re.compile(r'(?:^(?P```|~~~).*\n)?'
r'(?:^>>>.*'
r'(?:\n(?:(?:>>>|\.\.\.).*))*'
r'(?:\n.*)?\n\n?)+'
r'(?P=fence)?', re.MULTILINE).sub,
lambda m: (m.group(0) if m.group('fence') else
('\n ' + '\n '.join(m.group(0).split('\n')) + '\n\n')))):
"""
Indent non-fenced (`~~~`) top-level (0-indented)
doctest blocks so they render as code.
"""
if not text.endswith('\n'): # Needed for the r'(?:\n.*)?\n\n?)+' line (GH-72)
text += '\n'
return _indent_doctests(text)
@staticmethod
def raw_urls(text):
"""Wrap URLs in Python-Markdown-compatible ."""
return re.sub(r'(?)\s]+)(\s*)', r'\1<\2>\3', text)
class _MathPattern(InlineProcessor):
NAME = 'pdoc-math'
PATTERN = r'(?'): # CUT was put into its own paragraph
toc = toc[:-3].rstrip()
return toc
def format_git_link(template: str, dobj: pdoc.Doc):
"""
Interpolate `template` as a formatted string literal using values extracted
from `dobj` and the working environment.
"""
if not template:
return None
try:
if 'commit' in _str_template_fields(template):
commit = _git_head_commit()
abs_path = inspect.getfile(inspect.unwrap(dobj.obj))
path = _project_relative_path(abs_path)
lines, start_line = inspect.getsourcelines(dobj.obj)
end_line = start_line + len(lines) - 1
url = template.format(**locals())
return url
except Exception:
warn('format_git_link for {} failed:\n{}'.format(dobj.obj, traceback.format_exc()))
return None
@lru_cache()
def _git_head_commit():
"""
If the working directory is part of a git repository, return the
head git commit hash. Otherwise, raise a CalledProcessError.
"""
process_args = ['git', 'rev-parse', 'HEAD']
try:
commit = subprocess.check_output(process_args, universal_newlines=True).strip()
return commit
except OSError as error:
warn("git executable not found on system:\n{}".format(error))
except subprocess.CalledProcessError as error:
warn(
"Ensure pdoc is run within a git repository.\n"
"`{}` failed with output:\n{}"
.format(' '.join(process_args), error.output)
)
return None
@lru_cache()
def _git_project_root():
"""
Return the path to project root directory or None if indeterminate.
"""
path = None
for cmd in (['git', 'rev-parse', '--show-superproject-working-tree'],
['git', 'rev-parse', '--show-toplevel']):
try:
path = subprocess.check_output(cmd, universal_newlines=True).rstrip('\r\n')
if path:
break
except (subprocess.CalledProcessError, OSError):
pass
return path
@lru_cache()
def _project_relative_path(absolute_path):
"""
Convert an absolute path of a python source file to a project-relative path.
Assumes the project's path is either the current working directory or
Python library installation.
"""
from distutils.sysconfig import get_python_lib
for prefix_path in (_git_project_root() or os.getcwd(),
get_python_lib()):
common_path = os.path.commonpath([prefix_path, absolute_path])
if common_path == prefix_path:
# absolute_path is a descendant of prefix_path
return os.path.relpath(absolute_path, prefix_path)
raise RuntimeError(
"absolute path {!r} is not a descendant of the current working directory "
"or of the system's python library."
.format(absolute_path)
)
@lru_cache()
def _str_template_fields(template):
"""
Return a list of `str.format` field names in a template string.
"""
from string import Formatter
return [
field_name
for _, field_name, _, _ in Formatter().parse(template)
if field_name is not None
]