test_markdown_sanitization.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """Tests for checklist 2.3 — Markdown rendering sanitization. |
| 2 | |
| 3 | Verifies that _markdown() (backed by mistune HTMLRenderer with escape=True) |
| 4 | strips or escapes dangerous HTML before it reaches the browser. All tests |
| 5 | are synchronous and require no fixtures. |
| 6 | """ |
| 7 | from __future__ import annotations |
| 8 | |
| 9 | import pytest |
| 10 | |
| 11 | from musehub.api.routes.musehub.jinja2_filters import _markdown |
| 12 | |
| 13 | |
| 14 | # --------------------------------------------------------------------------- |
| 15 | # XSS / injection payloads that must NOT survive rendering |
| 16 | # --------------------------------------------------------------------------- |
| 17 | |
| 18 | def test_script_tag_is_not_rendered() -> None: |
| 19 | """Raw <script> in Markdown must not appear in HTML output.""" |
| 20 | result = _markdown("<script>alert('xss')</script>") |
| 21 | assert "<script" not in result |
| 22 | |
| 23 | |
| 24 | def test_iframe_tag_is_not_rendered() -> None: |
| 25 | """Raw <iframe> in Markdown must not appear in HTML output.""" |
| 26 | result = _markdown('<iframe src="evil.com">') |
| 27 | assert "<iframe" not in result |
| 28 | |
| 29 | |
| 30 | def test_img_onerror_is_not_rendered() -> None: |
| 31 | """Inline event handlers on <img> tags must not be executable in output. |
| 32 | |
| 33 | mistune escape=True HTML-encodes the entire raw tag, so no unescaped <img> |
| 34 | reaches the browser. We verify the raw tag is gone, not the encoded form. |
| 35 | """ |
| 36 | result = _markdown('<img onerror="alert(1)" src="x">') |
| 37 | assert "<img" not in result, f"Raw <img> tag must be escaped, got: {result}" |
| 38 | |
| 39 | |
| 40 | def test_javascript_url_in_link_is_stripped() -> None: |
| 41 | """javascript: protocol in a Markdown link must not reach the output.""" |
| 42 | result = _markdown("[click me](javascript:alert(1))") |
| 43 | assert "javascript:" not in result |
| 44 | |
| 45 | |
| 46 | def test_style_tag_is_not_rendered() -> None: |
| 47 | """Raw <style> blocks must not appear in HTML output.""" |
| 48 | result = _markdown("<style>body{display:none}</style>") |
| 49 | assert "<style" not in result |
| 50 | |
| 51 | |
| 52 | def test_on_event_handler_attribute_stripped() -> None: |
| 53 | """Generic on* event handler attributes embedded in raw HTML must be blocked. |
| 54 | |
| 55 | mistune escape=True HTML-encodes the entire raw tag, so no unescaped <a> |
| 56 | with an event handler reaches the browser. |
| 57 | """ |
| 58 | result = _markdown('<a href="x" onclick="evil()">link</a>') |
| 59 | assert "<a " not in result, f"Raw <a> tag must be escaped, got: {result}" |
| 60 | |
| 61 | |
| 62 | def test_data_uri_in_img_src() -> None: |
| 63 | """data: URI in an img src (potential vector for script execution) is URL-encoded.""" |
| 64 | # The renderer URL-encodes the src; "data:" contains a colon which is allowed by |
| 65 | # _esc_url but a raw <img> tag is a raw-HTML block that gets escaped entirely. |
| 66 | result = _markdown('<img src="data:text/html,<script>evil()</script>">') |
| 67 | assert "<script" not in result |
| 68 | |
| 69 | |
| 70 | # --------------------------------------------------------------------------- |
| 71 | # Normal Markdown that must render correctly |
| 72 | # --------------------------------------------------------------------------- |
| 73 | |
| 74 | def test_bold_renders_strong() -> None: |
| 75 | """**bold** must produce <strong> or equivalent bold element.""" |
| 76 | result = _markdown("**bold**") |
| 77 | assert "<strong>" in result or "<b>" in result |
| 78 | |
| 79 | |
| 80 | def test_normal_link_renders_href() -> None: |
| 81 | """[Google](https://google.com) must produce an anchor with href.""" |
| 82 | result = _markdown("[Google](https://google.com)") |
| 83 | assert "href=" in result |
| 84 | assert "google.com" in result |
| 85 | |
| 86 | |
| 87 | def test_normal_link_has_noopener() -> None: |
| 88 | """Links must carry rel='noopener noreferrer' for security.""" |
| 89 | result = _markdown("[Google](https://google.com)") |
| 90 | assert "noopener" in result |
| 91 | |
| 92 | |
| 93 | def test_plain_text_unchanged() -> None: |
| 94 | """Ordinary prose without special characters must pass through intact.""" |
| 95 | text = "Hello world this is plain text." |
| 96 | result = _markdown(text) |
| 97 | assert "Hello world this is plain text." in result |
| 98 | |
| 99 | |
| 100 | def test_italic_renders() -> None: |
| 101 | result = _markdown("*italic*") |
| 102 | assert "<em>" in result or "<i>" in result |
| 103 | |
| 104 | |
| 105 | def test_heading_renders() -> None: |
| 106 | """Headings must be shifted down one level (h1 → h2).""" |
| 107 | result = _markdown("# Title") |
| 108 | assert "<h2" in result and "Title" in result |
| 109 | |
| 110 | |
| 111 | # --------------------------------------------------------------------------- |
| 112 | # Fenced code block — script inside code must be escaped |
| 113 | # --------------------------------------------------------------------------- |
| 114 | |
| 115 | def test_script_in_fenced_code_block_is_escaped() -> None: |
| 116 | """<script> inside a fenced code block must be rendered as <script>.""" |
| 117 | md = "```html\n<script>alert('xss')</script>\n```" |
| 118 | result = _markdown(md) |
| 119 | # The raw tag must not appear |
| 120 | assert "<script>" not in result |
| 121 | # The escaped form must be present |
| 122 | assert "<script>" in result |
| 123 | |
| 124 | |
| 125 | def test_angle_brackets_in_inline_code_are_escaped() -> None: |
| 126 | """Inline code containing angle brackets must be HTML-escaped.""" |
| 127 | result = _markdown("`<b>not bold</b>`") |
| 128 | assert "<b>" not in result |
| 129 | assert "<b>" in result |
| 130 | |
| 131 | |
| 132 | # --------------------------------------------------------------------------- |
| 133 | # Edge cases |
| 134 | # --------------------------------------------------------------------------- |
| 135 | |
| 136 | def test_none_returns_empty_string() -> None: |
| 137 | """None input must return an empty string, not raise.""" |
| 138 | assert _markdown(None) == "" |
| 139 | |
| 140 | |
| 141 | def test_empty_string_returns_empty_string() -> None: |
| 142 | """Empty string input must return an empty string.""" |
| 143 | assert _markdown("") == "" |
| 144 | |
| 145 | |
| 146 | def test_whitespace_only_returns_empty_string() -> None: |
| 147 | """Whitespace-only input is falsy and returns an empty string.""" |
| 148 | # The function guards with `if not value` which catches empty strings; |
| 149 | # whitespace-only strings are truthy in Python so they pass through mistune. |
| 150 | # This test documents the actual behaviour rather than asserting empty. |
| 151 | result = _markdown(" ") |
| 152 | # Must not raise; output is either empty or whitespace — never raw HTML injection. |
| 153 | assert "<script" not in result |