gabriel / musehub public
test_markdown_sanitization.py python
153 lines 5.8 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
1 """Tests for checklist 2.3 — Markdown rendering sanitization.
2
3 Verifies that _markdown() (backed by mistune HTMLRenderer with escape=True)
4 strips or escapes dangerous HTML before it reaches the browser. All tests
5 are synchronous and require no fixtures.
6 """
7 from __future__ import annotations
8
9 import pytest
10
11 from musehub.api.routes.musehub.jinja2_filters import _markdown
12
13
14 # ---------------------------------------------------------------------------
15 # XSS / injection payloads that must NOT survive rendering
16 # ---------------------------------------------------------------------------
17
18 def test_script_tag_is_not_rendered() -> None:
19 """Raw <script> in Markdown must not appear in HTML output."""
20 result = _markdown("<script>alert('xss')</script>")
21 assert "<script" not in result
22
23
24 def test_iframe_tag_is_not_rendered() -> None:
25 """Raw <iframe> in Markdown must not appear in HTML output."""
26 result = _markdown('<iframe src="evil.com">')
27 assert "<iframe" not in result
28
29
30 def test_img_onerror_is_not_rendered() -> None:
31 """Inline event handlers on <img> tags must not be executable in output.
32
33 mistune escape=True HTML-encodes the entire raw tag, so no unescaped <img>
34 reaches the browser. We verify the raw tag is gone, not the encoded form.
35 """
36 result = _markdown('<img onerror="alert(1)" src="x">')
37 assert "<img" not in result, f"Raw <img> tag must be escaped, got: {result}"
38
39
40 def test_javascript_url_in_link_is_stripped() -> None:
41 """javascript: protocol in a Markdown link must not reach the output."""
42 result = _markdown("[click me](javascript:alert(1))")
43 assert "javascript:" not in result
44
45
46 def test_style_tag_is_not_rendered() -> None:
47 """Raw <style> blocks must not appear in HTML output."""
48 result = _markdown("<style>body{display:none}</style>")
49 assert "<style" not in result
50
51
52 def test_on_event_handler_attribute_stripped() -> None:
53 """Generic on* event handler attributes embedded in raw HTML must be blocked.
54
55 mistune escape=True HTML-encodes the entire raw tag, so no unescaped <a>
56 with an event handler reaches the browser.
57 """
58 result = _markdown('<a href="x" onclick="evil()">link</a>')
59 assert "<a " not in result, f"Raw <a> tag must be escaped, got: {result}"
60
61
62 def test_data_uri_in_img_src() -> None:
63 """data: URI in an img src (potential vector for script execution) is URL-encoded."""
64 # The renderer URL-encodes the src; "data:" contains a colon which is allowed by
65 # _esc_url but a raw <img> tag is a raw-HTML block that gets escaped entirely.
66 result = _markdown('<img src="data:text/html,<script>evil()</script>">')
67 assert "<script" not in result
68
69
70 # ---------------------------------------------------------------------------
71 # Normal Markdown that must render correctly
72 # ---------------------------------------------------------------------------
73
74 def test_bold_renders_strong() -> None:
75 """**bold** must produce <strong> or equivalent bold element."""
76 result = _markdown("**bold**")
77 assert "<strong>" in result or "<b>" in result
78
79
80 def test_normal_link_renders_href() -> None:
81 """[Google](https://google.com) must produce an anchor with href."""
82 result = _markdown("[Google](https://google.com)")
83 assert "href=" in result
84 assert "google.com" in result
85
86
87 def test_normal_link_has_noopener() -> None:
88 """Links must carry rel='noopener noreferrer' for security."""
89 result = _markdown("[Google](https://google.com)")
90 assert "noopener" in result
91
92
93 def test_plain_text_unchanged() -> None:
94 """Ordinary prose without special characters must pass through intact."""
95 text = "Hello world this is plain text."
96 result = _markdown(text)
97 assert "Hello world this is plain text." in result
98
99
100 def test_italic_renders() -> None:
101 result = _markdown("*italic*")
102 assert "<em>" in result or "<i>" in result
103
104
105 def test_heading_renders() -> None:
106 """Headings must be shifted down one level (h1 → h2)."""
107 result = _markdown("# Title")
108 assert "<h2" in result and "Title" in result
109
110
111 # ---------------------------------------------------------------------------
112 # Fenced code block — script inside code must be escaped
113 # ---------------------------------------------------------------------------
114
115 def test_script_in_fenced_code_block_is_escaped() -> None:
116 """<script> inside a fenced code block must be rendered as &lt;script&gt;."""
117 md = "```html\n<script>alert('xss')</script>\n```"
118 result = _markdown(md)
119 # The raw tag must not appear
120 assert "<script>" not in result
121 # The escaped form must be present
122 assert "&lt;script&gt;" in result
123
124
125 def test_angle_brackets_in_inline_code_are_escaped() -> None:
126 """Inline code containing angle brackets must be HTML-escaped."""
127 result = _markdown("`<b>not bold</b>`")
128 assert "<b>" not in result
129 assert "&lt;b&gt;" in result
130
131
132 # ---------------------------------------------------------------------------
133 # Edge cases
134 # ---------------------------------------------------------------------------
135
136 def test_none_returns_empty_string() -> None:
137 """None input must return an empty string, not raise."""
138 assert _markdown(None) == ""
139
140
141 def test_empty_string_returns_empty_string() -> None:
142 """Empty string input must return an empty string."""
143 assert _markdown("") == ""
144
145
146 def test_whitespace_only_returns_empty_string() -> None:
147 """Whitespace-only input is falsy and returns an empty string."""
148 # The function guards with `if not value` which catches empty strings;
149 # whitespace-only strings are truthy in Python so they pass through mistune.
150 # This test documents the actual behaviour rather than asserting empty.
151 result = _markdown(" ")
152 # Must not raise; output is either empty or whitespace — never raw HTML injection.
153 assert "<script" not in result
File History 3 commits
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
sha256:6b1949fc2797ca4c1936a637a4cbfec828ef56cf52398a2e74ca3c4f494e728f fix: use wire_bytes not mpack_bytes_raw in compute_object_b… Sonnet 4.6 patch 10 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d chore: doc sweep, ignore wrangler build state, misc fixes Sonnet 4.6 minor 13 days ago