Source code for coaster.utils.markdown

"""
Markdown processor
==================

Markdown parser with a number of sane defaults that resembles
GitHub-Flavoured Markdown (GFM).

GFM exists because normal markdown has some vicious gotchas. Further reading:
http://blog.stackoverflow.com/2009/10/markdown-one-year-later/

This Markdown processor is used by :func:`~coaster.sqlalchemy.columns.MarkdownColumn`
to auto-render HTML from Markdown text.
"""

from copy import deepcopy
from typing import Any, Dict, List, Mapping, Optional, Union, cast, overload

from bleach import linkify as linkify_processor
from markdown import Markdown
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor
from markupsafe import Markup
from pymdownx.emoji import to_alt as emoji_to_alt

from .text import (
    LINKIFY_CALLBACKS,
    LINKIFY_SKIP_TAGS,
    VALID_TAGS,
    normalize_spaces_multiline,
    sanitize_html,
)

__all__ = [
    'markdown',
    'MARKDOWN_HTML_TAGS',
    'default_markdown_extensions_html',
    'default_markdown_extensions',
    'default_markdown_extension_configs',
]


# --- Constants ------------------------------------------------------------------------

MARKDOWN_HTML_TAGS = deepcopy(VALID_TAGS)
cast(Dict, MARKDOWN_HTML_TAGS).update(
    {
        # For tables:
        'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'width'],
        'caption': [],
        'col': ['align', 'char', 'charoff'],
        'colgroup': ['align', 'span', 'cols', 'char', 'charoff', 'width'],
        'tbody': ['align', 'char', 'charoff', 'valign'],
        'td': ['align', 'char', 'charoff', 'colspan', 'rowspan', 'valign'],
        'tfoot': ['align', 'char', 'charoff', 'valign'],
        'th': ['align', 'char', 'charoff', 'colspan', 'rowspan', 'valign'],
        'thead': ['align', 'char', 'charoff', 'valign'],
        'tr': ['align', 'char', 'charoff', 'valign'],
    }
)

# --- Extensions -----------------------------------------------------------------------


class EscapeHtml(Extension):
    """
    Extension to escape HTML tags to use with Markdown().

    This replaces `safe_mode='escape`
    Ref: https://python-markdown.github.io/change_log/release-3.0/
    #safe_mode-and-html_replacement_text-keywords-deprecated
    """

    def extendMarkdown(self, md) -> None:  # NOQA: N802
        md.preprocessors.deregister('html_block')
        md.inlinePatterns.deregister('html')


class JavascriptProtocolProcessor(Treeprocessor):
    """Processor to remove `javascript:` links."""

    def run(self, root):
        for anchor in root.iter('a'):
            href = anchor.attrib.get('href')
            if href and href.lower().startswith('javascript:'):
                del anchor.attrib['href']


class JavascriptProtocolExtension(Extension):
    """Markdown extension for :class:`JavascriptProtocolProcessor`."""

    def extendMarkdown(self, md) -> None:  # NOQA: N802
        # Register with low priority so we run last
        md.treeprocessors.register(
            JavascriptProtocolProcessor(md), 'javascript_protocol', 1
        )
        md.registerExtension(self)


# --- Standard extensions --------------------------------------------------------------

# FIXME: Disable support for custom css classes as described here:
# https://facelessuser.github.io/pymdown-extensions/extensions/superfences/#injecting-classes-ids-and-attributes

default_markdown_extensions_html: List[Union[str, Extension]] = [
    'markdown.extensions.abbr',
    'markdown.extensions.footnotes',
    'markdown.extensions.tables',
    'markdown.extensions.nl2br',
    'markdown.extensions.sane_lists',
    'markdown.extensions.smarty',
    'pymdownx.superfences',
    'pymdownx.betterem',
    'pymdownx.caret',  # Support ^^<ins>^^
    'pymdownx.tilde',  # Support ~~<del>~~
    'pymdownx.emoji',  # Support :emoji:
    'pymdownx.mark',  # Support ==<mark>==
    'pymdownx.saneheaders',  # Disable `#header`, only allow `# header`
    'pymdownx.smartsymbols',
    JavascriptProtocolExtension(),
]

default_markdown_extensions = default_markdown_extensions_html + [
    'markdown.extensions.codehilite',
    'pymdownx.tasklist',
    EscapeHtml(),
]


default_markdown_extension_configs: Mapping[str, Mapping[str, Any]] = {
    'markdown.extensions.codehilite': {'css_class': 'highlight', 'guess_lang': False},
    'pymdownx.superfences': {
        'css_class': 'highlight',
        'disable_indented_code_blocks': True,
    },
    'pymdownx.smartsymbols': {
        'trademark': False,
        'copyright': False,
        'registered': False,
        'care_of': False,
        'plusminus': True,
        'arrows': True,
        'notequal': True,
        'fractions': True,
        'ordinal_numbers': True,
    },
    'pymdownx.emoji': {'emoji_generator': emoji_to_alt},
    'pymdownx.mark': {'smart_mark': True},
}


# --- Markdown processor ---------------------------------------------------------------


@overload
def markdown(  # NOQA: D103
    text: None,
    html: bool = False,
    linkify: bool = True,
    valid_tags: Optional[Union[List[str], Mapping[str, List]]] = None,
    extensions: Optional[List[Union[str, Extension]]] = None,
    extension_configs: Optional[Mapping[str, Mapping[str, Any]]] = None,
) -> None:
    ...


@overload
def markdown(  # NOQA: D103
    text: str,
    html: bool = False,
    linkify: bool = True,
    valid_tags: Optional[Union[List[str], Mapping[str, List]]] = None,
    extensions: Optional[List[Union[str, Extension]]] = None,
    extension_configs: Optional[Mapping[str, Mapping[str, Any]]] = None,
) -> Markup:
    ...


[docs]def markdown( text: Optional[str], html: bool = False, linkify: bool = True, valid_tags: Optional[Union[List[str], Mapping[str, List]]] = None, extensions: Optional[List[Union[str, Extension]]] = None, extension_configs: Optional[Mapping[str, Mapping[str, Any]]] = None, ) -> Optional[Markup]: """ Markdown parser with a number of sane defaults that resemble GFM. :param bool html: Allow known-safe HTML tags in text (this disables code syntax highlighting and task lists) :param bool linkify: Whether to convert naked URLs into links :param dict valid_tags: Valid tags and attributes if HTML is allowed :param list extensions: List of Markdown extensions to be enabled :param dict extension_configs: Config for Markdown extensions """ if text is None: return None if valid_tags is None: valid_tags = MARKDOWN_HTML_TAGS if extensions is None: if html: extensions = default_markdown_extensions_html else: extensions = default_markdown_extensions if extension_configs is None: extension_configs = default_markdown_extension_configs # Replace invisible characters with spaces text = normalize_spaces_multiline(text) if html: return Markup( sanitize_html( Markdown( output_format='html', extensions=extensions, extension_configs=extension_configs, ).convert(cast(str, text)), valid_tags=valid_tags, linkify=linkify, ) ) else: output = Markdown( output_format='html', extensions=extensions, extension_configs=extension_configs, ).convert(cast(str, text)) if linkify: output = linkify_processor( output, callbacks=LINKIFY_CALLBACKS, skip_tags=LINKIFY_SKIP_TAGS ) return Markup(output)