Skip to content

Commit 75037c6

Browse files
committed
Put escape sequences into separate token
1 parent 750a954 commit 75037c6

File tree

11 files changed

+162
-31
lines changed

11 files changed

+162
-31
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
9+
## [13.0.0] - WIP
10+
### Added
11+
- Added a new token type `text_special` to store escaped characters, same as `text` but
12+
unaffected by replacement plugins (smartquotes, typographer, linkifier, etc.).
13+
- Added a new rule `text_join` in `core` ruler. Text replacement plugins may choose to
14+
insert themselves before it.
15+
16+
### Changed
17+
- `text_collapse` rule is renamed to `fragments_join`.
18+
19+
### Fixed
20+
- Smartquotes, typographic replacements and plain text links can now be escaped
21+
with backslash (e.g. `\(c)` or `google\.com` are no longer replaced).
22+
23+
824
## [12.3.2] - 2022-01-08
925
### Security
1026
- Fix possible ReDOS in newline rule. Thanks to @MakeNowJust.
@@ -592,6 +608,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
592608
- Renamed presets folder (configs -> presets).
593609

594610

611+
[13.0.0]: https://github.com/markdown-it/markdown-it/compare/12.3.2...13.0.0
595612
[12.3.2]: https://github.com/markdown-it/markdown-it/compare/12.3.1...12.3.2
596613
[12.3.1]: https://github.com/markdown-it/markdown-it/compare/12.3.0...12.3.1
597614
[12.3.0]: https://github.com/markdown-it/markdown-it/compare/12.2.0...12.3.0

lib/parser_core.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ var _rules = [
1616
[ 'inline', require('./rules_core/inline') ],
1717
[ 'linkify', require('./rules_core/linkify') ],
1818
[ 'replacements', require('./rules_core/replacements') ],
19-
[ 'smartquotes', require('./rules_core/smartquotes') ]
19+
[ 'smartquotes', require('./rules_core/smartquotes') ],
20+
// `text_join` finds `text_special` tokens (for escape sequences)
21+
// and joins them with the rest of the text
22+
[ 'text_join', require('./rules_core/text_join') ]
2023
];
2124

2225

lib/parser_inline.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,18 @@ var _rules = [
2626
[ 'entity', require('./rules_inline/entity') ]
2727
];
2828

29+
// `rule2` ruleset was created specifically for emphasis/strikethrough
30+
// post-processing and may be changed in the future.
31+
//
32+
// Don't use this for anything except pairs (plugins working with `balance_pairs`).
33+
//
2934
var _rules2 = [
3035
[ 'balance_pairs', require('./rules_inline/balance_pairs') ],
3136
[ 'strikethrough', require('./rules_inline/strikethrough').postProcess ],
3237
[ 'emphasis', require('./rules_inline/emphasis').postProcess ],
33-
[ 'text_collapse', require('./rules_inline/text_collapse') ]
38+
// rules for pairs separate '**' into its own text tokens, which may be left unused,
39+
// rule below merges unused segments back with the rest of the text
40+
[ 'fragments_join', require('./rules_inline/fragments_join') ]
3441
];
3542

3643

lib/presets/commonmark.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ module.exports = {
3838
rules: [
3939
'normalize',
4040
'block',
41-
'inline'
41+
'inline',
42+
'text_join'
4243
]
4344
},
4445

@@ -73,7 +74,7 @@ module.exports = {
7374
rules2: [
7475
'balance_pairs',
7576
'emphasis',
76-
'text_collapse'
77+
'fragments_join'
7778
]
7879
}
7980
}

lib/presets/zero.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ module.exports = {
3939
rules: [
4040
'normalize',
4141
'block',
42-
'inline'
42+
'inline',
43+
'text_join'
4344
]
4445
},
4546

@@ -55,7 +56,7 @@ module.exports = {
5556
],
5657
rules2: [
5758
'balance_pairs',
58-
'text_collapse'
59+
'fragments_join'
5960
]
6061
}
6162
}

lib/rules_core/text_join.js

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Join raw text tokens with the rest of the text
2+
//
3+
// This is set as a separate rule to provide an opportunity for plugins
4+
// to run text replacements after text join, but before escape join.
5+
//
6+
// For example, `\:)` shouldn't be replaced with an emoji.
7+
//
8+
'use strict';
9+
10+
11+
module.exports = function text_join(state) {
12+
var j, l, tokens, curr, max, last,
13+
blockTokens = state.tokens;
14+
15+
for (j = 0, l = blockTokens.length; j < l; j++) {
16+
if (blockTokens[j].type !== 'inline') continue;
17+
18+
tokens = blockTokens[j].children;
19+
max = tokens.length;
20+
21+
for (curr = 0; curr < max; curr++) {
22+
if (tokens[curr].type === 'text_special') {
23+
tokens[curr].type = 'text';
24+
}
25+
}
26+
27+
for (curr = last = 0; curr < max; curr++) {
28+
if (tokens[curr].type === 'text' &&
29+
curr + 1 < max &&
30+
tokens[curr + 1].type === 'text') {
31+
32+
// collapse two adjacent text nodes
33+
tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content;
34+
} else {
35+
if (curr !== last) { tokens[last] = tokens[curr]; }
36+
37+
last++;
38+
}
39+
}
40+
41+
if (curr !== last) {
42+
tokens.length = last;
43+
}
44+
}
45+
};

lib/rules_inline/escape.js

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,59 @@ for (var i = 0; i < 256; i++) { ESCAPED.push(0); }
1313

1414

1515
module.exports = function escape(state, silent) {
16-
var ch, pos = state.pos, max = state.posMax;
17-
18-
if (state.src.charCodeAt(pos) !== 0x5C/* \ */) { return false; }
16+
var ch1, ch2, origStr, escapedStr, token, pos = state.pos, max = state.posMax;
1917

18+
if (state.src.charCodeAt(pos) !== 0x5C/* \ */) return false;
2019
pos++;
2120

22-
if (pos < max) {
23-
ch = state.src.charCodeAt(pos);
21+
// '\' at the end of the inline block
22+
if (pos >= max) return false;
23+
24+
ch1 = state.src.charCodeAt(pos);
2425

25-
if (ch < 256 && ESCAPED[ch] !== 0) {
26-
if (!silent) { state.pending += state.src[pos]; }
27-
state.pos += 2;
28-
return true;
26+
if (ch1 === 0x0A) {
27+
if (!silent) {
28+
state.push('hardbreak', 'br', 0);
2929
}
3030

31-
if (ch === 0x0A) {
32-
if (!silent) {
33-
state.push('hardbreak', 'br', 0);
34-
}
31+
pos++;
32+
// skip leading whitespaces from next line
33+
while (pos < max) {
34+
ch1 = state.src.charCodeAt(pos);
35+
if (!isSpace(ch1)) break;
36+
pos++;
37+
}
38+
39+
state.pos = pos;
40+
return true;
41+
}
42+
43+
escapedStr = state.src[pos];
3544

45+
if (ch1 >= 0xD800 && ch1 <= 0xDBFF && pos + 1 < max) {
46+
ch2 = state.src.charCodeAt(pos + 1);
47+
48+
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
49+
escapedStr += state.src[pos + 1];
3650
pos++;
37-
// skip leading whitespaces from next line
38-
while (pos < max) {
39-
ch = state.src.charCodeAt(pos);
40-
if (!isSpace(ch)) { break; }
41-
pos++;
42-
}
43-
44-
state.pos = pos;
45-
return true;
4651
}
4752
}
4853

49-
if (!silent) { state.pending += '\\'; }
50-
state.pos++;
54+
origStr = '\\' + escapedStr;
55+
56+
if (!silent) {
57+
token = state.push('text_special', '', 0);
58+
59+
if (ch1 < 256 && ESCAPED[ch1] !== 0) {
60+
token.content = escapedStr;
61+
} else {
62+
token.content = origStr;
63+
}
64+
65+
token.markup = origStr;
66+
token.info = 'escape';
67+
}
68+
69+
state.pos = pos + 1;
5170
return true;
5271
};

lib/rules_inline/text_collapse.js renamed to lib/rules_inline/fragments_join.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
'use strict';
1010

1111

12-
module.exports = function text_collapse(state) {
12+
module.exports = function fragments_join(state) {
1313
var curr, last,
1414
level = 0,
1515
tokens = state.tokens,

test/fixtures/markdown-it/smartquotes.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,16 @@ Should parse quotes adjacent to inline html, #677:
164164
<p>“test <br>”</p>
165165
<p>“<br> test”</p>
166166
.
167+
168+
Should be escapable:
169+
.
170+
"foo"
171+
172+
\"foo"
173+
174+
"foo\"
175+
.
176+
<p>“foo”</p>
177+
<p>&quot;foo&quot;</p>
178+
<p>&quot;foo&quot;</p>
179+
.

test/fixtures/markdown-it/typographer.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ dupes
6060
<p>!!! ??? ,</p>
6161
.
6262

63+
copyright should be escapable
64+
.
65+
\(c)
66+
.
67+
<p>(c)</p>
68+
.
69+
6370

6471
dashes
6572
.
@@ -80,6 +87,16 @@ markdownit--awesome
8087
<p>markdownit–awesome</p>
8188
.
8289

90+
dashes should be escapable
91+
.
92+
foo \-- bar
93+
94+
foo -\- bar
95+
.
96+
<p>foo -- bar</p>
97+
<p>foo -- bar</p>
98+
.
99+
83100
regression tests for #624
84101
.
85102
1---2---3

test/misc.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,14 @@ describe('Misc', function () {
254254
md.render('# test\n\n - hello\n - world\n')
255255
);
256256
});
257+
258+
it('Should escape surrogate pairs (coverage)', function () {
259+
var md = markdownit();
260+
261+
assert.strictEqual(md.render('\\\uD835\uDC9C'), '<p>\\\uD835\uDC9C</p>\n');
262+
assert.strictEqual(md.render('\\\uD835x'), '<p>\\\uD835x</p>\n');
263+
assert.strictEqual(md.render('\\\uD835'), '<p>\\\uD835</p>\n');
264+
});
257265
});
258266

259267

0 commit comments

Comments
 (0)