Markdown processor

Markdown parser with a number of sane defaults that resembles
GitHub-Flavoured Markdown (GFM).

GFM exists because normal markdown has some vicious gotchas. Further reading:

This Markdown processor is used by :func:`~coaster.sqlalchemy.columns.MarkdownColumn`
to auto-render HTML from Markdown text.

from copy import deepcopy
from typing import Any, Dict, List, Mapping, Optional, Union, cast, overload

from bleach import linkify as linkify_processor
from markdown import Markdown
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor
from markupsafe import Markup
from pymdownx.emoji import to_alt as emoji_to_alt

from .text import (

__all__ = [

# --- Constants ------------------------------------------------------------------------

cast(Dict, MARKDOWN_HTML_TAGS).update(
        # For tables:
        'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'width'],
        'caption': [],
        'col': ['align', 'char', 'charoff'],
        'colgroup': ['align', 'span', 'cols', 'char', 'charoff', 'width'],
        'tbody': ['align', 'char', 'charoff', 'valign'],
        'td': ['align', 'char', 'charoff', 'colspan', 'rowspan', 'valign'],
        'tfoot': ['align', 'char', 'charoff', 'valign'],
        'th': ['align', 'char', 'charoff', 'colspan', 'rowspan', 'valign'],
        'thead': ['align', 'char', 'charoff', 'valign'],
        'tr': ['align', 'char', 'charoff', 'valign'],

# --- Extensions -----------------------------------------------------------------------

class EscapeHtml(Extension):
    Extension to escape HTML tags to use with Markdown().

    This replaces `safe_mode='escape`

    def extendMarkdown(self, md) -> None:  # NOQA: N802

class JavascriptProtocolProcessor(Treeprocessor):
    """Processor to remove `javascript:` links."""

    def run(self, root):
        for anchor in root.iter('a'):
            href = anchor.attrib.get('href')
            if href and href.lower().startswith('javascript:'):
                del anchor.attrib['href']

class JavascriptProtocolExtension(Extension):
    """Markdown extension for :class:`JavascriptProtocolProcessor`."""

    def extendMarkdown(self, md) -> None:  # NOQA: N802
        # Register with low priority so we run last
            JavascriptProtocolProcessor(md), 'javascript_protocol', 1

# --- Standard extensions --------------------------------------------------------------

# FIXME: Disable support for custom css classes as described here:

default_markdown_extensions_html: List[Union[str, Extension]] = [
    'pymdownx.caret',  # Support ^^<ins>^^
    'pymdownx.tilde',  # Support ~~<del>~~
    'pymdownx.emoji',  # Support :emoji:
    'pymdownx.mark',  # Support ==<mark>==
    'pymdownx.saneheaders',  # Disable `#header`, only allow `# header`

default_markdown_extensions = default_markdown_extensions_html + [

default_markdown_extension_configs: Mapping[str, Mapping[str, Any]] = {
    'markdown.extensions.codehilite': {'css_class': 'highlight', 'guess_lang': False},
    'pymdownx.superfences': {
        'css_class': 'highlight',
        'disable_indented_code_blocks': True,
    'pymdownx.smartsymbols': {
        'trademark': False,
        'copyright': False,
        'registered': False,
        'care_of': False,
        'plusminus': True,
        'arrows': True,
        'notequal': True,
        'fractions': True,
        'ordinal_numbers': True,
    'pymdownx.emoji': {'emoji_generator': emoji_to_alt},
    'pymdownx.mark': {'smart_mark': True},

# --- Markdown processor ---------------------------------------------------------------

[docs]def markdown( text: Optional[str], html: bool = False, linkify: bool = True, valid_tags: Optional[Union[List[str], Mapping[str, List]]] = None, extensions: Optional[List[Union[str, Extension]]] = None, extension_configs: Optional[Mapping[str, Mapping[str, Any]]] = None, ) -> Optional[Markup]: """ Markdown parser with a number of sane defaults that resemble GFM. :param bool html: Allow known-safe HTML tags in text (this disables code syntax highlighting and task lists) :param bool linkify: Whether to convert naked URLs into links :param dict valid_tags: Valid tags and attributes if HTML is allowed :param list extensions: List of Markdown extensions to be enabled :param dict extension_configs: Config for Markdown extensions """ if text is None: return None if valid_tags is None: valid_tags = MARKDOWN_HTML_TAGS if extensions is None: if html: extensions = default_markdown_extensions_html else: extensions = default_markdown_extensions if extension_configs is None: extension_configs = default_markdown_extension_configs # Replace invisible characters with spaces text = normalize_spaces_multiline(text) if html: return Markup( sanitize_html( Markdown( output_format='html', extensions=extensions, extension_configs=extension_configs, ).convert(cast(str, text)), valid_tags=valid_tags, linkify=linkify, ) ) else: output = Markdown( output_format='html', extensions=extensions, extension_configs=extension_configs, ).convert(cast(str, text)) if linkify: output = linkify_processor( output, callbacks=LINKIFY_CALLBACKS, skip_tags=LINKIFY_SKIP_TAGS ) return Markup(output)