Skip to content

Commit f3fe504

Browse files
authored
refactor: remove RawDataUrl and extract to deno_media_type (#564)
1 parent 9d26d04 commit f3fe504

File tree

6 files changed

+18
-374
lines changed

6 files changed

+18
-374
lines changed

Cargo.lock

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ async-trait = "0.1.68"
4545
capacity_builder = "0.5.0"
4646
data-url = "0.3.0"
4747
deno_ast = { version = "0.44.0", features = ["dep_analysis", "emit"] }
48-
deno_media_type = "0.2.3"
48+
deno_media_type = { version = "0.2.4", features = ["decoding", "data_url", "module_specifier"] }
4949
deno_unsync.workspace = true
5050
deno_path_util = "0.3.0"
5151
deno_semver = "0.7.1"

src/graph.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use crate::source::*;
3333
use deno_ast::dep::DynamicDependencyKind;
3434
use deno_ast::dep::ImportAttributes;
3535
use deno_ast::dep::StaticDependencyKind;
36+
use deno_ast::encoding::detect_charset;
3637
use deno_ast::LineAndColumnIndex;
3738
use deno_ast::MediaType;
3839
use deno_ast::ParseDiagnostic;
@@ -2377,10 +2378,12 @@ pub(crate) async fn parse_module_source_and_info(
23772378
Some("json")
23782379
))
23792380
{
2380-
return match crate::source::decode_source(
2381-
&opts.specifier,
2381+
let charset = maybe_charset.unwrap_or_else(|| {
2382+
detect_charset(&opts.specifier, opts.content.as_ref())
2383+
});
2384+
return match deno_media_type::encoding::decode_arc_source(
2385+
charset,
23822386
opts.content,
2383-
maybe_charset,
23842387
) {
23852388
Ok(text) => Ok(ModuleSourceAndInfo::Json {
23862389
specifier: opts.specifier,
@@ -5395,10 +5398,13 @@ impl<'a> NpmSpecifierResolver<'a> {
53955398

53965399
fn new_source_with_text(
53975400
specifier: &ModuleSpecifier,
5398-
text: Arc<[u8]>,
5401+
bytes: Arc<[u8]>,
53995402
maybe_charset: Option<&str>,
54005403
) -> Result<Arc<str>, Box<ModuleError>> {
5401-
crate::source::decode_source(specifier, text, maybe_charset).map_err(|err| {
5404+
let charset = maybe_charset.unwrap_or_else(|| {
5405+
deno_media_type::encoding::detect_charset(specifier, bytes.as_ref())
5406+
});
5407+
deno_media_type::encoding::decode_arc_source(charset, bytes).map_err(|err| {
54025408
Box::new(ModuleError::LoadingErr(
54035409
specifier.clone(),
54045410
None,

src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ pub mod symbols;
1818
mod fast_check;
1919
pub mod packages;
2020
pub mod source;
21-
mod text_encoding;
2221

2322
use source::FileSystem;
2423
use source::JsrUrlProvider;

src/source/mod.rs

+3-253
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
// Copyright 2018-2024 the Deno authors. MIT license.
22

3-
use std::borrow::Cow;
43
use std::collections::HashMap;
54
use std::fmt;
65
use std::path::Path;
76
use std::path::PathBuf;
87
use std::sync::Arc;
98

109
use async_trait::async_trait;
11-
use data_url::DataUrl;
10+
use deno_ast::data_url::RawDataUrl;
1211
use deno_ast::MediaType;
1312
use deno_ast::ModuleSpecifier;
1413
use deno_error::JsErrorClass;
@@ -30,7 +29,6 @@ use crate::graph::Range;
3029
use crate::module_specifier::resolve_import;
3130
use crate::packages::JsrPackageInfo;
3231
use crate::packages::JsrPackageVersionInfo;
33-
use crate::text_encoding;
3432
use crate::ModuleInfo;
3533
use crate::NpmLoadError;
3634
use crate::SpecifierError;
@@ -551,75 +549,15 @@ pub fn load_data_url(
551549
specifier: &ModuleSpecifier,
552550
) -> Result<Option<LoadResponse>, std::io::Error> {
553551
let data_url = RawDataUrl::parse(specifier)?;
554-
let (bytes, headers) = data_url.into_bytes_and_headers();
552+
let (bytes, mime_type) = data_url.into_bytes_and_mime_type();
553+
let headers = HashMap::from([("content-type".to_string(), mime_type)]);
555554
Ok(Some(LoadResponse::Module {
556555
specifier: specifier.clone(),
557556
maybe_headers: Some(headers),
558557
content: Arc::from(bytes),
559558
}))
560559
}
561560

562-
#[derive(Debug, Clone)]
563-
pub struct RawDataUrl {
564-
pub mime_type: String,
565-
pub bytes: Vec<u8>,
566-
}
567-
568-
impl RawDataUrl {
569-
pub fn parse(specifier: &ModuleSpecifier) -> Result<Self, std::io::Error> {
570-
use std::io::Error;
571-
use std::io::ErrorKind;
572-
573-
fn unable_to_decode() -> Error {
574-
Error::new(ErrorKind::InvalidData, "Unable to decode data url.")
575-
}
576-
577-
let url =
578-
DataUrl::process(specifier.as_str()).map_err(|_| unable_to_decode())?;
579-
let (bytes, _) = url.decode_to_vec().map_err(|_| unable_to_decode())?;
580-
Ok(RawDataUrl {
581-
mime_type: url.mime_type().to_string(),
582-
bytes,
583-
})
584-
}
585-
586-
pub fn charset(&self) -> Option<&str> {
587-
get_mime_type_charset(&self.mime_type)
588-
}
589-
590-
pub fn media_type(&self) -> MediaType {
591-
let mut content_types = self.mime_type.split(';');
592-
let Some(content_type) = content_types.next() else {
593-
return MediaType::Unknown;
594-
};
595-
MediaType::from_content_type(
596-
// this data url will be ignored when resolving the MediaType
597-
// as in this rare case the MediaType is determined solely based
598-
// on the provided content type
599-
&ModuleSpecifier::parse("data:image/png;base64,").unwrap(),
600-
content_type,
601-
)
602-
}
603-
604-
pub fn decode(self) -> Result<String, std::io::Error> {
605-
let charset = get_mime_type_charset(&self.mime_type).unwrap_or("utf-8");
606-
decode_owned_source_with_charset(self.bytes, charset)
607-
}
608-
609-
pub fn into_bytes_and_headers(self) -> (Vec<u8>, HashMap<String, String>) {
610-
let headers = HashMap::from([("content-type".to_string(), self.mime_type)]);
611-
(self.bytes, headers)
612-
}
613-
}
614-
615-
fn get_mime_type_charset(mime_type: &str) -> Option<&str> {
616-
mime_type
617-
.split(';')
618-
.skip(1)
619-
.map(str::trim)
620-
.find_map(|s| s.strip_prefix("charset="))
621-
}
622-
623561
/// An implementation of the loader attribute where the responses are provided
624562
/// ahead of time. This is useful for testing or
625563
#[derive(Default)]
@@ -838,100 +776,6 @@ pub fn resolve_media_type_and_charset_from_content_type<'a>(
838776
}
839777
}
840778

841-
/// Decodes the source bytes into a string handling any encoding rules
842-
/// where the bytes may be from a remote module, file module, or other.
843-
pub fn decode_owned_source(
844-
specifier: &ModuleSpecifier,
845-
bytes: Vec<u8>,
846-
maybe_charset: Option<&str>,
847-
) -> Result<String, std::io::Error> {
848-
let charset = maybe_charset.unwrap_or_else(|| {
849-
if specifier.scheme() == "file" {
850-
text_encoding::detect_charset(&bytes)
851-
} else {
852-
"utf-8"
853-
}
854-
});
855-
decode_owned_source_with_charset(bytes, charset)
856-
}
857-
858-
/// Decodes the source bytes into a string handling any encoding rules
859-
/// where the source is a `file:` specifier.
860-
pub fn decode_owned_file_source(
861-
bytes: Vec<u8>,
862-
) -> Result<String, std::io::Error> {
863-
let charset = text_encoding::detect_charset(&bytes);
864-
decode_owned_source_with_charset(bytes, charset)
865-
}
866-
867-
fn decode_owned_source_with_charset(
868-
bytes: Vec<u8>,
869-
charset: &str,
870-
) -> Result<String, std::io::Error> {
871-
match text_encoding::convert_to_utf8(&bytes, charset)? {
872-
Cow::Borrowed(text) => {
873-
if text.starts_with(text_encoding::BOM_CHAR) {
874-
Ok(text[text_encoding::BOM_CHAR.len_utf8()..].to_string())
875-
} else {
876-
Ok(
877-
// SAFETY: we know it's a valid utf-8 string at this point
878-
unsafe { String::from_utf8_unchecked(bytes) },
879-
)
880-
}
881-
}
882-
Cow::Owned(mut text) => {
883-
text_encoding::strip_bom_mut(&mut text);
884-
Ok(text)
885-
}
886-
}
887-
}
888-
889-
/// Decodes the source bytes into a string handling any encoding rules
890-
/// for local vs remote files and dealing with the charset.
891-
pub fn decode_source(
892-
specifier: &ModuleSpecifier,
893-
bytes: Arc<[u8]>,
894-
maybe_charset: Option<&str>,
895-
) -> Result<Arc<str>, std::io::Error> {
896-
let charset = maybe_charset.unwrap_or_else(|| {
897-
if specifier.scheme() == "file" {
898-
text_encoding::detect_charset(bytes.as_ref())
899-
} else {
900-
"utf-8"
901-
}
902-
});
903-
decode_with_charset(bytes, charset)
904-
}
905-
906-
fn decode_with_charset(
907-
bytes: Arc<[u8]>,
908-
charset: &str,
909-
) -> Result<Arc<str>, std::io::Error> {
910-
let text = match text_encoding::convert_to_utf8(bytes.as_ref(), charset)? {
911-
Cow::Borrowed(text) => {
912-
if text.starts_with(text_encoding::BOM_CHAR) {
913-
text[text_encoding::BOM_CHAR.len_utf8()..].to_string()
914-
} else {
915-
return Ok(
916-
// SAFETY: we know it's a valid utf-8 string at this point
917-
unsafe {
918-
let raw_ptr = Arc::into_raw(bytes);
919-
Arc::from_raw(std::mem::transmute::<*const [u8], *const str>(
920-
raw_ptr,
921-
))
922-
},
923-
);
924-
}
925-
}
926-
Cow::Owned(mut text) => {
927-
text_encoding::strip_bom_mut(&mut text);
928-
text
929-
}
930-
};
931-
let text: Arc<str> = Arc::from(text);
932-
Ok(text)
933-
}
934-
935779
#[cfg(test)]
936780
pub mod tests {
937781
use super::*;
@@ -1014,98 +858,4 @@ pub mod tests {
1014858
}
1015859
);
1016860
}
1017-
1018-
#[test]
1019-
fn test_parse_valid_data_url() {
1020-
let valid_data_url = "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==";
1021-
let specifier = ModuleSpecifier::parse(valid_data_url).unwrap();
1022-
let raw_data_url = RawDataUrl::parse(&specifier).unwrap();
1023-
assert_eq!(raw_data_url.mime_type, "text/plain");
1024-
assert_eq!(raw_data_url.bytes, b"Hello, World!");
1025-
}
1026-
1027-
#[test]
1028-
fn test_charset_with_valid_mime_type() {
1029-
let raw_data_url = RawDataUrl {
1030-
mime_type: "text/plain; charset=utf-8".to_string(),
1031-
bytes: vec![],
1032-
};
1033-
assert_eq!(raw_data_url.charset(), Some("utf-8"));
1034-
}
1035-
1036-
#[test]
1037-
fn test_charset_with_no_charset_in_mime_type() {
1038-
let raw_data_url = RawDataUrl {
1039-
mime_type: "text/plain".to_string(),
1040-
bytes: vec![],
1041-
};
1042-
assert_eq!(raw_data_url.charset(), None);
1043-
}
1044-
1045-
#[test]
1046-
fn test_media_type_with_known_type() {
1047-
let raw_data_url = RawDataUrl {
1048-
mime_type: "application/javascript;charset=utf-8".to_string(),
1049-
bytes: vec![],
1050-
};
1051-
assert_eq!(raw_data_url.media_type(), MediaType::JavaScript);
1052-
}
1053-
1054-
#[test]
1055-
fn test_media_type_with_unknown_type() {
1056-
let raw_data_url = RawDataUrl {
1057-
mime_type: "unknown/unknown".to_string(),
1058-
bytes: vec![],
1059-
};
1060-
assert_eq!(raw_data_url.media_type(), MediaType::Unknown);
1061-
}
1062-
1063-
#[test]
1064-
fn test_decode_with_valid_charset() {
1065-
let raw_data_url = RawDataUrl {
1066-
mime_type: "text/plain; charset=utf-8".to_string(),
1067-
bytes: "Hello, World!".as_bytes().to_vec(),
1068-
};
1069-
assert_eq!(raw_data_url.decode().unwrap(), "Hello, World!");
1070-
}
1071-
1072-
#[test]
1073-
fn test_decode_with_invalid_charset() {
1074-
let raw_data_url = RawDataUrl {
1075-
mime_type: "text/plain; charset=invalid-charset".to_string(),
1076-
bytes: vec![],
1077-
};
1078-
assert!(raw_data_url.decode().is_err());
1079-
}
1080-
1081-
#[test]
1082-
fn test_into_bytes_and_headers() {
1083-
let raw_data_url = RawDataUrl {
1084-
mime_type: "text/plain; charset=utf-8".to_string(),
1085-
bytes: "Hello, World!".as_bytes().to_vec(),
1086-
};
1087-
let (bytes, headers) = raw_data_url.into_bytes_and_headers();
1088-
assert_eq!(bytes, "Hello, World!".as_bytes());
1089-
assert_eq!(
1090-
headers.get("content-type").unwrap(),
1091-
"text/plain; charset=utf-8"
1092-
);
1093-
}
1094-
1095-
#[test]
1096-
fn test_decode_owned_with_bom() {
1097-
let text = decode_owned_file_source(
1098-
format!("{}{}", text_encoding::BOM_CHAR, "Hello").into_bytes(),
1099-
)
1100-
.unwrap();
1101-
assert_eq!(text, "Hello");
1102-
}
1103-
1104-
#[test]
1105-
fn test_decode_with_charset_with_bom() {
1106-
let bytes = format!("{}{}", text_encoding::BOM_CHAR, "Hello").into_bytes();
1107-
let charset = "utf-8";
1108-
let text = decode_with_charset(Arc::from(bytes), charset).unwrap();
1109-
assert_eq!(text.as_ref(), "Hello");
1110-
}
1111861
}

0 commit comments

Comments
 (0)