diff --git a/.changeset/empty-rules-double.md b/.changeset/empty-rules-double.md new file mode 100644 index 000000000000..d057b0fb9e83 --- /dev/null +++ b/.changeset/empty-rules-double.md @@ -0,0 +1,7 @@ +--- +swc_core: patch +swc_ecma_codegen: patch +swc_ecma_minifier: patch +--- + +fix(es/codegen): Fix escape of unicode characters diff --git a/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.js b/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.js index d450bbfd1a3d..7b9a8674ad85 100644 --- a/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.js +++ b/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.js @@ -1 +1 @@ -export default{a:"֑-ۯۺ-ࣿ‏\ud802-\ud803\ud83a-\ud83bיִ-﷿ﹰ-ﻼ",b:"A-Za-z\xc0-\xd6\xd8-\xf6\xf8-ʸ̀-֐ऀ-῿‎Ⰰ-\ud801\ud804-\ud839\ud83c-\udbff豈-﬜︀-﹯﻽-￿"}; +export default{a:"֑-ۯۺ-ࣿ‏\\ud802-\\ud803\\ud83a-\\ud83bיִ-﷿ﹰ-ﻼ",b:"A-Za-z\xc0-\xd6\xd8-\xf6\xf8-ʸ̀-֐ऀ-῿‎Ⰰ-\\ud801\\ud804-\\ud839\\ud83c-\\udbff豈-﬜︀-﹯﻽-￿"}; diff --git a/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.map b/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.map index f30ccd455831..7622e6fc9982 100644 --- a/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.map +++ b/crates/swc/tests/fixture/issues-4xxx/4120/1/output/index.map @@ -1,5 +1,5 @@ { - "mappings": "AAUA,cAAe,CAAEA,EATb,0CASgBC,EALhB,kFAKkB,CAAE", + "mappings": "AAUA,cAAe,CAAEA,EATb,8CASgBC,EALhB,uFAKkB,CAAE", "names": [ "a", "b" diff --git a/crates/swc/tests/fixture/issues-7xxx/7678/output/1.js b/crates/swc/tests/fixture/issues-7xxx/7678/output/1.js index 4581a3820115..90f649fa88aa 100644 --- a/crates/swc/tests/fixture/issues-7xxx/7678/output/1.js +++ b/crates/swc/tests/fixture/issues-7xxx/7678/output/1.js @@ -1 +1 @@ -let str="\uD83D\uDC68\\u200D\uD83D\uDE80";let obj={"\uD83D\uDC68\\u200D\uD83D\uDE80":"wrong"}; +let str="\\uD83D\\uDC68\\u200D\\uD83D\\uDE80";let obj={"\\uD83D\\uDC68\\u200D\\uD83D\\uDE80":"wrong"}; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings10_ES5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings10_ES5.1.normal.js index 9842e56d882e..341dae94b6ee 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings10_ES5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings10_ES5.1.normal.js @@ -3,4 +3,4 @@ // 2. Let cu1 be floor((cp – 65536) / 1024) + 0xD800. // Although we should just get back a single code point value of 0xD800, // this is a useful edge-case test. -var x = "\u{D800}"; +var x = "\\u{D800}"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings11_ES5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings11_ES5.1.normal.js index 2e2d5e00cd6d..27eb65f81854 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings11_ES5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInStrings11_ES5.1.normal.js @@ -3,4 +3,4 @@ // 2. Let cu2 be ((cp – 65536) modulo 1024) + 0xDC00. // Although we should just get back a single code point value of 0xDC00, // this is a useful edge-case test. -var x = "\u{DC00}"; +var x = "\\u{DC00}"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates10_ES5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates10_ES5.1.normal.js index 1e3e0fa950b4..74c0626b990c 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates10_ES5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates10_ES5.1.normal.js @@ -3,4 +3,4 @@ // 2. Let cu1 be floor((cp – 65536) / 1024) + 0xD800. // Although we should just get back a single code point value of 0xD800, // this is a useful edge-case test. -var x = "\u{D800}"; +var x = "\\u{D800}"; diff --git a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates11_ES5.1.normal.js b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates11_ES5.1.normal.js index db96092c3f5f..cff5726064c8 100644 --- a/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates11_ES5.1.normal.js +++ b/crates/swc/tests/tsc-references/unicodeExtendedEscapesInTemplates11_ES5.1.normal.js @@ -3,4 +3,4 @@ // 2. Let cu2 be ((cp – 65536) modulo 1024) + 0xDC00. // Although we should just get back a single code point value of 0xDC00, // this is a useful edge-case test. -var x = "\u{DC00}"; +var x = "\\u{DC00}"; diff --git a/crates/swc_ecma_codegen/src/lit.rs b/crates/swc_ecma_codegen/src/lit.rs index 1ebe6497faf6..9e5749ad3bb3 100644 --- a/crates/swc_ecma_codegen/src/lit.rs +++ b/crates/swc_ecma_codegen/src/lit.rs @@ -397,89 +397,7 @@ pub fn get_quoted_utf16(v: &str, ascii_only: bool, target: EsVersion) -> (AsciiC '\r' => buf.push_str("\\r"), '\u{000b}' => buf.push_str("\\v"), '\t' => buf.push('\t'), - '\\' => { - let next = iter.peek(); - match next { - Some('u') => { - let mut inner_iter = iter.clone(); - inner_iter.next(); - - let mut is_curly = false; - let mut next = inner_iter.peek(); - - if next == Some(&'{') { - is_curly = true; - inner_iter.next(); - next = inner_iter.peek(); - } else if next != Some(&'D') && next != Some(&'d') { - buf.push('\\'); - } - - if let Some(c @ 'D' | c @ 'd') = next { - let mut inner_buf = String::with_capacity(8); - inner_buf.push('\\'); - inner_buf.push('u'); - - if is_curly { - inner_buf.push('{'); - } - - inner_buf.push(*c); - inner_iter.next(); - - let mut is_valid = true; - for _ in 0..3 { - match inner_iter.next() { - Some(c @ '0'..='9') | Some(c @ 'a'..='f') - | Some(c @ 'A'..='F') => { - inner_buf.push(c); - } - _ => { - is_valid = false; - break; - } - } - } - - if is_curly { - inner_buf.push('}'); - } - - let range = if is_curly { - 3..(inner_buf.len() - 1) - } else { - 2..6 - }; - - if is_valid { - let val_str = &inner_buf[range]; - if let Ok(v) = u32::from_str_radix(val_str, 16) { - if v > 0xffff { - buf.push_str(&inner_buf); - let end = if is_curly { 7 } else { 5 }; - for _ in 0..end { - iter.next(); - } - } else if (0xd800..=0xdfff).contains(&v) { - buf.push('\\'); - } else { - buf.push_str("\\\\"); - } - } else { - buf.push_str("\\\\"); - } - } else { - buf.push_str("\\\\"); - } - } else if is_curly { - buf.push_str("\\\\"); - } else { - buf.push('\\'); - } - } - _ => buf.push_str("\\\\"), - } - } + '\\' => buf.push_str("\\\\"), c if c == escape_char => { buf.push('\\'); buf.push(c); diff --git a/crates/swc_ecma_codegen/src/tests.rs b/crates/swc_ecma_codegen/src/tests.rs index 108b89cf80e9..bf28ae61744a 100644 --- a/crates/swc_ecma_codegen/src/tests.rs +++ b/crates/swc_ecma_codegen/src/tests.rs @@ -988,6 +988,36 @@ fn issue_9630() { ); } +#[test] +fn issue_10353_1() { + test_from_to_custom_config( + r#"console.log("\\uD83D");"#, + r#"console.log("\\uD83D")"#, + Config { + ascii_only: false, + target: EsVersion::Es2020, + minify: true, + ..Default::default() + }, + Syntax::default(), + ); +} + +#[test] +fn issue_10353_2() { + test_from_to_custom_config( + r#"console.log("\\uD83D\\uDE42");"#, + r#"console.log("\\uD83D\\uDE42")"#, + Config { + ascii_only: false, + target: EsVersion::Es2020, + minify: true, + ..Default::default() + }, + Syntax::default(), + ); +} + #[testing::fixture("tests/str-lits/**/*.txt")] fn test_str_lit(input: PathBuf) { test_str_lit_inner(input) diff --git a/crates/swc_ecma_codegen/tests/fixture/string/output.min.js b/crates/swc_ecma_codegen/tests/fixture/string/output.min.js index c500cab45a4b..145b06fee213 100644 --- a/crates/swc_ecma_codegen/tests/fixture/string/output.min.js +++ b/crates/swc_ecma_codegen/tests/fixture/string/output.min.js @@ -1,3 +1,3 @@ import*as commonjsHelpers from"\0commonjsHelpers.js";const string1="test";const string2="test";const string3='te"st';const string4="te'st";const string5="test\ntest\ntest";const string6=`Yet another string primitive`;const string7="This is a very long string which needs to wrap across multiple lines because otherwise my code is unreadable.";const string8="中文 español English हिन्दी العربية português বাংলা русский 日本語 ਪੰਜਾਬੀ 한국어 தமிழ்";const string9=``;const string10=`xx\`x`;const string11=`${foo+2}`;const string12=` foo ${bar+`baz ${qux}`}`;const string13=String.raw`foo`;const string14=foo`bar`;const string15=`foo bar -ↂωↂ`;const string16=`\``;const string17=`${4+4} equals 4 + 4`;const string18=`This is ${undefined}`;const string19=`This is ${NaN}`;const string20=`This is ${null}`;const string21=`This is ${Infinity}`;const string22="This is ${1/0}";const string23="This is ${1/0}";const string24="This is ${NaN}";const string25="This is ${null}";const string26=`This is ${1/0}`;const string27=`This is ${0/0}`;const string28="This is ${0/0}";const string29="This is ${0/0}";const string30=`${4**11}`;const string31=`${4**12}`;const string32=`${4**14}`;const string33="";const string34="\b";const string35="\f";const string36=" ";const string37="\v";const string38="\n";const string39="\\n";const string40="\\";const string41='\\"';const string42="'\"";const string43="\\\\";const string44="\0";const string45="\0!";const string46="\x001";const string47="\\0";const string48="\\0!";const string49="\x07";const string50="\x07!";const string51="\x071";const string52="\x07";const string53="\\7";const string54="\\7!";const string55="\\01";const string56="\x10";const string57="\\x10";const string58="\x1b";const string59="\\x1B";const string60="ꯍ";const string61="ꯍ";const string62="U000123AB";const string63="𒎫";const string64="\uD808\uDFAB";const string65="\uD808";const string66="\uD808X";const string67="\uDFAB";const string68="\uDFABX";const string69="€";const string70="ÿ";const string71="🍕";const string72="\uD801\uDC02\uDC03\uD804";const string73="π";const 貓="🐈";const 貓abc="🐈";const abc貓="🐈";const string74="\u2028";const string75="\u2029";const string76="\uFEFF";const string77="\x10";const string78=" ";const string79=" ";const string80="2";const string81="\x16";const string82="\x06";const string83="\0a";const string84='"test"test"test';const string85="\"test'test'test";const string86='"test"test"test';const string87="'test'test'test";const string88="😄";const string89=new RegExp("\r").test("\r");const string90=new RegExp(" ").test(" ");const string91=new RegExp("\x1b").test("["+"\x1b"+"]");const string92=new RegExp("\\x1b").test("\x1b");const string93=new RegExp("\x1b").test("\x1b");const string94="퟿";const string95="ퟻ";const string96=sql`'#ERROR'`;const string97=" ";const string98="\ud83d\ude00";const string99="\ud83d@\ude00";const string100="a";const string101="\u2028";const string102="\uD800";const string103="\u{D800}";const string104="\uDBFF";const string105="\u{DBFF}";const string106="\uDC00";const string107="\u{DC00}";const string108="\uDFFF";const string109="\u{DFFF}";const string110="￿";const string111="￿";const string112="\ud800";const string113="\uD800";React.createElement("div",null,"this should not parse as unicode: \\u00a0");const a="֑-ۯۺ-ࣿ‏\ud802-\ud803\ud83a-\ud83bיִ-﷿ﹰ-ﻼ";const b="A-Za-zÀ-ÖØ-öø-ʸ̀-֐ऀ-῿‎Ⰰ-\ud801\ud804-\ud839\ud83c-\udbff豈-﬜︀-﹯﻽-￿";var x="\u{D800}";var x2="\u{D800}";var x3="\u{D800}\u{D800}";const zzz="\0a"; +ↂωↂ`;const string16=`\``;const string17=`${4+4} equals 4 + 4`;const string18=`This is ${undefined}`;const string19=`This is ${NaN}`;const string20=`This is ${null}`;const string21=`This is ${Infinity}`;const string22="This is ${1/0}";const string23="This is ${1/0}";const string24="This is ${NaN}";const string25="This is ${null}";const string26=`This is ${1/0}`;const string27=`This is ${0/0}`;const string28="This is ${0/0}";const string29="This is ${0/0}";const string30=`${4**11}`;const string31=`${4**12}`;const string32=`${4**14}`;const string33="";const string34="\b";const string35="\f";const string36=" ";const string37="\v";const string38="\n";const string39="\\n";const string40="\\";const string41='\\"';const string42="'\"";const string43="\\\\";const string44="\0";const string45="\0!";const string46="\x001";const string47="\\0";const string48="\\0!";const string49="\x07";const string50="\x07!";const string51="\x071";const string52="\x07";const string53="\\7";const string54="\\7!";const string55="\\01";const string56="\x10";const string57="\\x10";const string58="\x1b";const string59="\\x1B";const string60="ꯍ";const string61="ꯍ";const string62="U000123AB";const string63="𒎫";const string64="\\uD808\\uDFAB";const string65="\\uD808";const string66="\\uD808X";const string67="\\uDFAB";const string68="\\uDFABX";const string69="€";const string70="ÿ";const string71="🍕";const string72="\\uD801\\uDC02\\uDC03\\uD804";const string73="π";const 貓="🐈";const 貓abc="🐈";const abc貓="🐈";const string74="\u2028";const string75="\u2029";const string76="\uFEFF";const string77="\x10";const string78=" ";const string79=" ";const string80="2";const string81="\x16";const string82="\x06";const string83="\0a";const string84='"test"test"test';const string85="\"test'test'test";const string86='"test"test"test';const string87="'test'test'test";const string88="😄";const string89=new RegExp("\r").test("\r");const string90=new RegExp(" ").test(" ");const string91=new RegExp("\x1b").test("["+"\x1b"+"]");const string92=new RegExp("\\x1b").test("\x1b");const string93=new RegExp("\x1b").test("\x1b");const string94="퟿";const string95="ퟻ";const string96=sql`'#ERROR'`;const string97=" ";const string98="\\ud83d\\ude00";const string99="\\ud83d@\\ude00";const string100="a";const string101="\u2028";const string102="\\uD800";const string103="\\u{D800}";const string104="\\uDBFF";const string105="\\u{DBFF}";const string106="\\uDC00";const string107="\\u{DC00}";const string108="\\uDFFF";const string109="\\u{DFFF}";const string110="￿";const string111="￿";const string112="\\ud800";const string113="\\uD800";React.createElement("div",null,"this should not parse as unicode: \\u00a0");const a="֑-ۯۺ-ࣿ‏\\ud802-\\ud803\\ud83a-\\ud83bיִ-﷿ﹰ-ﻼ";const b="A-Za-zÀ-ÖØ-öø-ʸ̀-֐ऀ-῿‎Ⰰ-\\ud801\\ud804-\\ud839\\ud83c-\\udbff豈-﬜︀-﹯﻽-￿";var x="\\u{D800}";var x2="\\u{D800}";var x3="\\u{D800}\\u{D800}";const zzz="\0a"; diff --git a/crates/swc_ecma_codegen/tests/fixture/template-literal/output.min.js b/crates/swc_ecma_codegen/tests/fixture/template-literal/output.min.js index c73000026c7e..ab4a15ef73b5 100644 --- a/crates/swc_ecma_codegen/tests/fixture/template-literal/output.min.js +++ b/crates/swc_ecma_codegen/tests/fixture/template-literal/output.min.js @@ -1,5 +1,5 @@ const template_literal1=`test${"test"}test${"test"}`;const template_literal2=``;const template_literal3=` `;const template_literal4=`string text`;const template_literal5=`string text line 1 - string text line 2`;const template_literal6=`string text ${expression} string text`;const templateFn=expression=>`string text ${expression} string text`;const template_literal7=example`string text ${expression} string text`;const template_literal8=`header ${isLargeScreen()?"":`icon-${item.isCollapsed?"expander":"collapser"}`}`;const template_literal9=`test \u00A9`;const template_literal10=`test \u{2F804}`;const template_literal11=`test \xa9`;const template_literal12=`test \0o251`;function latex(str){return{"cooked":str[0],"raw":str.raw[0]}}const template_literal14=latex`\unicode`;const template_literal15=`"test"test"test`;const template_literal16=`"test'test'test`;const template_literal17=`"test"test"test`;const template_literal18=`'test'test'test`;const template_literal19=`\0`;const template_literal20=`\x01`;const template_literal21=`\0${0}`;const template_literal22=`\x01${0}`;const template_literal23=`${0}\0`;const template_literal24=`${0}\x01`;const template_literal25=`${0}\0${1}`;const template_literal26=`${0}\x01${1}`;const template_literal27=String.raw`\1`;const template_literal28=String.raw`\\x01`;const template_literal29=String.raw`\\1${0}`;const template_literal30=String.raw`\\x01${0}`;const template_literal31=String.raw`${0}\\1`;const template_literal32=String.raw`${0}\\x01`;const template_literal33=String.raw`${0}\\1${1}`;const template_literal34=String.raw`${0}\\x01${1}`;const template_literal35=`${y}`;const template_literal36=`$(y)`;const template_literal37=`{y}$`;const template_literal38=`$}y{`;const template_literal39=`\\${y}`;const template_literal40=`$\\{y}`;await tag`x`;await (tag`x`);(await tag)`x`;await tag`${x}`;await (tag`${x}`);(await tag)`${x}`;new tag`x`;new(tag`x`);new tag()`x`;(new tag)`x`;new tag`${x}`;new(tag`${x}`);new tag()`${x}`;(new tag)`${x}`;new tag`${x}`;new(tag`${x}`);new tag()`${x}`;(new tag)`${x}`;const template_literal41=`${"test`"}${'test"'}${"test'''"}`;const template_literal42="֑-ۯۺ-ࣿ‏\ud802-\ud803\ud83a-\ud83bיִ-﷿ﹰ-ﻼ";const template_literal43="A-Za-zÀ-ÖØ-öø-ʸ̀-֐ऀ-῿‎Ⰰ-\ud801\ud804-\ud839\ud83c-\udbff豈-﬜︀-﹯﻽-￿";const template_literal45=`xx\`x`;const template_literal46=`${foo+2}`;const template_literal47=` foo ${bar+`baz ${qux}`}`;const template_literal48=`foo + string text line 2`;const template_literal6=`string text ${expression} string text`;const templateFn=expression=>`string text ${expression} string text`;const template_literal7=example`string text ${expression} string text`;const template_literal8=`header ${isLargeScreen()?"":`icon-${item.isCollapsed?"expander":"collapser"}`}`;const template_literal9=`test \u00A9`;const template_literal10=`test \u{2F804}`;const template_literal11=`test \xa9`;const template_literal12=`test \0o251`;function latex(str){return{"cooked":str[0],"raw":str.raw[0]}}const template_literal14=latex`\unicode`;const template_literal15=`"test"test"test`;const template_literal16=`"test'test'test`;const template_literal17=`"test"test"test`;const template_literal18=`'test'test'test`;const template_literal19=`\0`;const template_literal20=`\x01`;const template_literal21=`\0${0}`;const template_literal22=`\x01${0}`;const template_literal23=`${0}\0`;const template_literal24=`${0}\x01`;const template_literal25=`${0}\0${1}`;const template_literal26=`${0}\x01${1}`;const template_literal27=String.raw`\1`;const template_literal28=String.raw`\\x01`;const template_literal29=String.raw`\\1${0}`;const template_literal30=String.raw`\\x01${0}`;const template_literal31=String.raw`${0}\\1`;const template_literal32=String.raw`${0}\\x01`;const template_literal33=String.raw`${0}\\1${1}`;const template_literal34=String.raw`${0}\\x01${1}`;const template_literal35=`${y}`;const template_literal36=`$(y)`;const template_literal37=`{y}$`;const template_literal38=`$}y{`;const template_literal39=`\\${y}`;const template_literal40=`$\\{y}`;await tag`x`;await (tag`x`);(await tag)`x`;await tag`${x}`;await (tag`${x}`);(await tag)`${x}`;new tag`x`;new(tag`x`);new tag()`x`;(new tag)`x`;new tag`${x}`;new(tag`${x}`);new tag()`${x}`;(new tag)`${x}`;new tag`${x}`;new(tag`${x}`);new tag()`${x}`;(new tag)`${x}`;const template_literal41=`${"test`"}${'test"'}${"test'''"}`;const template_literal42="֑-ۯۺ-ࣿ‏\\ud802-\\ud803\\ud83a-\\ud83bיִ-﷿ﹰ-ﻼ";const template_literal43="A-Za-zÀ-ÖØ-öø-ʸ̀-֐ऀ-῿‎Ⰰ-\\ud801\\ud804-\\ud839\\ud83c-\\udbff豈-﬜︀-﹯﻽-￿";const template_literal45=`xx\`x`;const template_literal46=`${foo+2}`;const template_literal47=` foo ${bar+`baz ${qux}`}`;const template_literal48=`foo bar ↂωↂ`;const template_literal48=`This is ${undefined}`;const template_literal49=`This is ${NaN}`;const template_literal50=`This is ${null}`;const template_literal51=`This is ${Infinity}`;const template_literal60=`${4**11}`;const template_literal61=`Hello ${guest()}, welcome to ${location()}${"."}`;const template_literal62=`${1}${2}${3}${4}${5}${6}${7}${8}${9}${0}`;const template_literal63=`${foobar()}${foobar()}${foobar()}${foobar()}`;const template_literal64=`${1}${foobar()}${2}${foobar()}${3}${foobar()}`;const template_literal65="Decimals "+`${1}${2}${3}${4}${5}${6}${7}${8}${9}${0}`;const template_literal66=`${`${`${`foo`}`}`}`;const template_literal67=`before ${`innerBefore ${any} innerAfter`} after`;const template_literal68=`1 ${2+`3 ${any} 4`+5} 6`;const template_literal69=`${content}`;const template_literal70=``;const template_literal72=`\u0020\u{20}\u{00020} `;console.log(`\\n\\r\\u2028\\u2029 \r\u2028\u2029`);function a(){return`\ diff --git a/crates/swc_ecma_codegen/tests/fixture/vercel/2/output.min.js b/crates/swc_ecma_codegen/tests/fixture/vercel/2/output.min.js index 7715f1f01994..1a1d6888ed7c 100644 --- a/crates/swc_ecma_codegen/tests/fixture/vercel/2/output.min.js +++ b/crates/swc_ecma_codegen/tests/fixture/vercel/2/output.min.js @@ -5,4 +5,4 @@ function isUpdateAvailable(){// __webpack_hash__ is the hash of the current comp function canApplyUpdates(){return module.hot.status()==="idle"}// This function reads code updates on the fly and hard // reloads the page when it has changed. async function tryApplyUpdates(){if(!isUpdateAvailable()||!canApplyUpdates()){return}try{const res=await fetch(typeof __webpack_runtime_id__!=="undefined"?`${hotUpdatePath}${curHash}.${__webpack_runtime_id__}.hot-update.json`:`${hotUpdatePath}${curHash}.hot-update.json`);const jsonData=await res.json();const curPage=page==="/"?"index":page;// webpack 5 uses an array instead -const pageUpdated=(Array.isArray(jsonData.c)?jsonData.c:Object.keys(jsonData.c)).some(mod=>{return(mod.indexOf(`pages${curPage.startsWith("/")?curPage:`/${curPage}`}`)!==-1||mod.indexOf(`pages${curPage.startsWith("/")?curPage:`/${curPage}`}`.replace(/\//g,"\\"))!==-1)});if(pageUpdated){document.location.reload(true)}else{curHash=mostRecentHash}}catch(err){console.error("Error occurred checking for update",err);document.location.reload(true)}}addMessageListener(event=>{if(event.data==="\uD83D\uDC93"){return}try{const message=JSON.parse(event.data);if(message.action==="sync"||message.action==="built"){if(!message.hash){return}mostRecentHash=message.hash;tryApplyUpdates()}else if(message.action==="reloadPage"){document.location.reload(true)}}catch(ex){console.warn("Invalid HMR message: "+event.data+"\n"+ex)}});connectHMR({assetPrefix,path:"/_next/webpack-hmr"});displayContent();initOnDemandEntries(data.page); +const pageUpdated=(Array.isArray(jsonData.c)?jsonData.c:Object.keys(jsonData.c)).some(mod=>{return(mod.indexOf(`pages${curPage.startsWith("/")?curPage:`/${curPage}`}`)!==-1||mod.indexOf(`pages${curPage.startsWith("/")?curPage:`/${curPage}`}`.replace(/\//g,"\\"))!==-1)});if(pageUpdated){document.location.reload(true)}else{curHash=mostRecentHash}}catch(err){console.error("Error occurred checking for update",err);document.location.reload(true)}}addMessageListener(event=>{if(event.data==="\\uD83D\\uDC93"){return}try{const message=JSON.parse(event.data);if(message.action==="sync"||message.action==="built"){if(!message.hash){return}mostRecentHash=message.hash;tryApplyUpdates()}else if(message.action==="reloadPage"){document.location.reload(true)}}catch(ex){console.warn("Invalid HMR message: "+event.data+"\n"+ex)}});connectHMR({assetPrefix,path:"/_next/webpack-hmr"});displayContent();initOnDemandEntries(data.page); diff --git a/crates/swc_ecma_lexer/src/common/lexer/mod.rs b/crates/swc_ecma_lexer/src/common/lexer/mod.rs index 5196fea33037..736abe9468cf 100644 --- a/crates/swc_ecma_lexer/src/common/lexer/mod.rs +++ b/crates/swc_ecma_lexer/src/common/lexer/mod.rs @@ -1112,26 +1112,31 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { op(self, &mut buf) } - fn read_unicode_escape(&mut self) -> LexResult> { + fn read_unicode_escape(&mut self) -> LexResult { debug_assert_eq!(self.cur(), Some('u')); - let mut chars = Vec::with_capacity(4); - let mut is_curly = false; - self.bump(); // 'u' - if self.eat(b'{') { - is_curly = true; - } + let is_curly = self.eat(b'{'); - let state = self.input().cur_pos(); let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) { Ok(Some(val)) => { - if 0x0010_ffff >= val { - char::from_u32(val) + if val <= 0x0010_ffff { + char::from_u32(val).ok_or_else(|| { + let start = self.cur_pos(); + crate::error::Error::new( + pos_span(start), + SyntaxError::BadCharacterEscapeSequence { + expected: if is_curly { + "1-6 hex characters in the range 0 to 10FFFF." + } else { + "4 hex characters" + }, + }, + ) + })? } else { let start = self.cur_pos(); - self.error( start, SyntaxError::BadCharacterEscapeSequence { @@ -1146,7 +1151,6 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { } _ => { let start = self.cur_pos(); - self.error( start, SyntaxError::BadCharacterEscapeSequence { @@ -1160,54 +1164,12 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { } }; - match c { - Some(c) => { - chars.push(c.into()); - } - _ => { - unsafe { - // Safety: state is valid position because we got it from cur_pos() - self.input_mut().reset_to(state); - } - - chars.push(Char::from('\\')); - chars.push(Char::from('u')); - - if is_curly { - chars.push(Char::from('{')); - - for _ in 0..6 { - if let Some(c) = self.input().cur() { - if c == '}' { - break; - } - - self.bump(); - - chars.push(Char::from(c)); - } else { - break; - } - } - - chars.push(Char::from('}')); - } else { - for _ in 0..4 { - if let Some(c) = self.input().cur() { - self.bump(); - - chars.push(Char::from(c)); - } - } - } - } - } - if is_curly && !self.eat(b'}') { - self.error(state, SyntaxError::InvalidUnicodeEscape)? + let start = self.cur_pos(); + self.error(start, SyntaxError::InvalidUnicodeEscape)? } - Ok(chars) + Ok(c.into()) } #[cold] @@ -1337,9 +1299,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { match self.read_escaped_char(true) { Ok(Some(chars)) => { if let Ok(ref mut cooked) = cooked { - for c in chars { - cooked.extend(c); - } + cooked.extend(chars); } } Ok(None) => {} @@ -1358,7 +1318,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { /// Read an escaped character for string literal. /// /// In template literal, we should preserve raw string. - fn read_escaped_char(&mut self, in_template: bool) -> LexResult>> { + fn read_escaped_char(&mut self, in_template: bool) -> LexResult> { debug_assert_eq!(self.cur(), Some('\\')); let start = self.cur_pos(); @@ -1396,7 +1356,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { self.bump(); // 'x' match self.read_int_u32::<16>(2)? { - Some(val) => return Ok(Some(vec![Char::from(val)])), + Some(val) => return Ok(Some(Char::from(val))), None => self.error( start, SyntaxError::BadCharacterEscapeSequence { @@ -1407,10 +1367,10 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { } // read unicode escape sequences - 'u' => match self.read_unicode_escape() { - Ok(chars) => return Ok(Some(chars)), - Err(err) => self.error(start, err.into_kind())?, - }, + 'u' => { + let c = self.read_unicode_escape()?; + return Ok(Some(c)); + } // octal escape sequences '0'..='7' => { @@ -1420,7 +1380,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { match self.cur() { Some(next) if next.is_digit(8) => c, // \0 is not an octal literal nor decimal literal. - _ => return Ok(Some(vec!['\u{0000}'.into()])), + _ => return Ok(Some('\u{0000}'.into())), } } else { c @@ -1447,7 +1407,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { .and_then(|value| value.checked_add(v as u8)); match new_val { Some(val) => val, - None => return Ok(Some(vec![Char::from(value as char)])), + None => return Ok(Some(Char::from(value as char))), } } else { value * 8 + v as u8 @@ -1455,7 +1415,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { self.bump(); } - _ => return Ok(Some(vec![Char::from(value as u32)])), + _ => return Ok(Some(Char::from(value as u32))), } }}; } @@ -1463,7 +1423,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { one!(false); one!(true); - return Ok(Some(vec![Char::from(value as char)])); + return Ok(Some(Char::from(value as char))); } _ => c, }; @@ -1473,7 +1433,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { self.input_mut().bump(); } - Ok(Some(vec![c.into()])) + Ok(Some(c.into())) } /// Expects current char to be '/' @@ -1688,23 +1648,19 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { } } - let chars = l.read_unicode_escape()?; + let c = l.read_unicode_escape()?; - if let Some(c) = chars.first() { - let valid = if first { - c.is_ident_start() - } else { - c.is_ident_part() - }; + let valid = if first { + c.is_ident_start() + } else { + c.is_ident_part() + }; - if !valid { - l.emit_error(start, SyntaxError::InvalidIdentChar); - } + if !valid { + l.emit_error(start, SyntaxError::InvalidIdentChar); } - for c in chars { - buf.extend(c); - } + buf.extend(c); slice_start = l.cur_pos(); continue; @@ -2063,9 +2019,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { } if let Some(chars) = l.read_escaped_char(false)? { - for c in chars { - buf.extend(c); - } + buf.extend(chars); } slice_start = l.cur_pos(); diff --git a/crates/swc_ecma_minifier/src/compress/pure/strings.rs b/crates/swc_ecma_minifier/src/compress/pure/strings.rs index 9e3be0025c99..4a75d83273b9 100644 --- a/crates/swc_ecma_minifier/src/compress/pure/strings.rs +++ b/crates/swc_ecma_minifier/src/compress/pure/strings.rs @@ -8,6 +8,72 @@ use Value::Known; use super::Pure; +/// Concatenates two strings with Unicode surrogate pair awareness. +/// Handles cases where the first string ends with a Unicode escape sequence +/// (like \uD83D) and the second string starts with a Unicode escape sequence +/// (like \uDE00) that together form a valid Unicode surrogate pair. +fn concat_unicode_aware(left: &str, right: &str) -> String { + // Fast path for empty strings + if left.is_empty() { + return right.to_string(); + } + if right.is_empty() { + return left.to_string(); + } + + // Check if left ends with \uXXXX and right starts with \uXXXX + let left_unicode_pattern = if left.len() >= 6 && left.ends_with("\\ude00") { + None // Invalid pattern, \ude00 should be second + } else if left.len() >= 6 { + let suffix = &left[left.len() - 6..]; + if let Some(hex_part) = suffix.strip_prefix("\\u") { + if hex_part.len() == 4 && hex_part.chars().all(|c| c.is_ascii_hexdigit()) { + u32::from_str_radix(hex_part, 16).ok() + } else { + None + } + } else { + None + } + } else { + None + }; + + let right_unicode_pattern = if right.len() >= 6 && right.starts_with("\\u") { + let hex_part = &right[2..6]; + if hex_part.len() == 4 && hex_part.chars().all(|c| c.is_ascii_hexdigit()) { + u32::from_str_radix(hex_part, 16).ok() + } else { + None + } + } else { + None + }; + + // Check if we have a high surrogate followed by a low surrogate + if let (Some(high), Some(low)) = (left_unicode_pattern, right_unicode_pattern) { + if (0xd800..=0xdbff).contains(&high) && (0xdc00..=0xdfff).contains(&low) { + // Combine the surrogate pair into a single Unicode code point + let high_offset = high - 0xd800; + let low_offset = low - 0xdc00; + let code_point = 0x10000 + (high_offset << 10) + low_offset; + + if let Some(combined_char) = char::from_u32(code_point) { + // Build result: left without the \uXXXX + combined char + right without the + // \uXXXX + let mut result = String::new(); + result.push_str(&left[..left.len() - 6]); // Remove \uXXXX from end + result.push(combined_char); + result.push_str(&right[6..]); // Remove \uXXXX from beginning + return result; + } + } + } + + // Fallback to simple concatenation + format!("{left}{right}") +} + impl Pure<'_> { /// This only handles `'foo' + ('bar' + baz) because others are handled by /// expression simplifier. @@ -44,7 +110,7 @@ impl Pure<'_> { self.changed = true; report_change!("evaluate: 'foo' + ('bar' + baz) => 'foobar' + baz"); - let s = lls.into_owned() + &*rls; + let s = concat_unicode_aware(&lls, &rls); *e = BinExpr { span, op: op!(bin, "+"), @@ -495,7 +561,7 @@ impl Pure<'_> { if let Value::Known(second_str) = left.right.as_pure_string(self.expr_ctx) { if let Value::Known(third_str) = bin.right.as_pure_string(self.expr_ctx) { - let new_str = format!("{second_str}{third_str}"); + let new_str = concat_unicode_aware(&second_str, &third_str); let left_span = left.span; self.changed = true; diff --git a/crates/swc_ecma_minifier/tests/exec.rs b/crates/swc_ecma_minifier/tests/exec.rs index 6f7082fcd07e..373cafa3ed85 100644 --- a/crates/swc_ecma_minifier/tests/exec.rs +++ b/crates/swc_ecma_minifier/tests/exec.rs @@ -11478,6 +11478,15 @@ fn issue_10133() { ); } +#[test] +fn issue_10353() { + run_default_exec_test( + r#" + console.log("\\uD83D\\uDE42"); + "#, + ); +} + #[test] fn issue_10435() { run_default_exec_test( diff --git a/crates/swc_ecma_minifier/tests/libs-size.snapshot.md b/crates/swc_ecma_minifier/tests/libs-size.snapshot.md index 686e8857966d..c8d51a1bd616 100644 --- a/crates/swc_ecma_minifier/tests/libs-size.snapshot.md +++ b/crates/swc_ecma_minifier/tests/libs-size.snapshot.md @@ -4,7 +4,7 @@ | d3.js | 542.74 KiB | 261.63 KiB | 85.57 KiB | | echarts.js | 3.41 MiB | 977.52 KiB | 314.19 KiB | | jquery.js | 280.89 KiB | 87.80 KiB | 30.21 KiB | -| lodash.js | 531.35 KiB | 68.91 KiB | 24.60 KiB | +| lodash.js | 531.35 KiB | 68.92 KiB | 24.60 KiB | | moment.js | 169.83 KiB | 57.39 KiB | 18.26 KiB | | react.js | 70.45 KiB | 22.44 KiB | 8.04 KiB | | terser.js | 1.08 MiB | 446.68 KiB | 120.49 KiB | diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index fe0748fbfd2f..92ee22986d08 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -405,9 +405,7 @@ impl Lexer<'_> { match self.read_escaped_char(true) { Ok(Some(chars)) => { if let Ok(ref mut cooked) = cooked { - for c in chars { - cooked.extend(c); - } + cooked.extend(chars); } } Ok(None) => {} diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 5cd1f0380fa2..f2dde6b4c18f 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -521,13 +521,11 @@ impl Lexer<'_> { continue; } self.bump(); // bump 'u' - let Ok(chars) = self.read_unicode_escape() else { + let Ok(c) = self.read_unicode_escape() else { self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape); break; }; - for c in chars { - v.extend(c); - } + v.extend(c); self.token_flags |= swc_ecma_lexer::lexer::TokenFlags::UNICODE; } else { break;