@@ -306,20 +306,71 @@ pub trait NpmResolver: fmt::Debug {
306
306
pub fn load_data_url (
307
307
specifier : & ModuleSpecifier ,
308
308
) -> Result < Option < LoadResponse > , anyhow:: Error > {
309
- let url = DataUrl :: process ( specifier. as_str ( ) )
310
- . map_err ( |_| anyhow ! ( "Unable to decode data url." ) ) ?;
311
- let ( bytes, _) = url
312
- . decode_to_vec ( )
313
- . map_err ( |_| anyhow ! ( "Unable to decode data url." ) ) ?;
314
- let mut headers: HashMap < String , String > = HashMap :: with_capacity ( 1 ) ;
315
- headers. insert ( "content-type" . to_string ( ) , url. mime_type ( ) . to_string ( ) ) ;
309
+ let data_url = RawDataUrl :: parse ( specifier) ?;
310
+ let ( bytes, headers) = data_url. into_bytes_and_headers ( ) ;
316
311
Ok ( Some ( LoadResponse :: Module {
317
312
specifier : specifier. clone ( ) ,
318
313
maybe_headers : Some ( headers) ,
319
314
content : Arc :: from ( bytes) ,
320
315
} ) )
321
316
}
322
317
318
+ #[ derive( Debug , Clone ) ]
319
+ pub struct RawDataUrl {
320
+ pub mime_type : String ,
321
+ pub bytes : Vec < u8 > ,
322
+ }
323
+
324
+ impl RawDataUrl {
325
+ pub fn parse ( specifier : & ModuleSpecifier ) -> Result < Self , Error > {
326
+ let url = DataUrl :: process ( specifier. as_str ( ) )
327
+ . map_err ( |_| anyhow ! ( "Unable to decode data url." ) ) ?;
328
+ let ( bytes, _) = url
329
+ . decode_to_vec ( )
330
+ . map_err ( |_| anyhow ! ( "Unable to decode data url." ) ) ?;
331
+ Ok ( RawDataUrl {
332
+ mime_type : url. mime_type ( ) . to_string ( ) ,
333
+ bytes,
334
+ } )
335
+ }
336
+
337
+ pub fn charset ( & self ) -> Option < & str > {
338
+ get_mime_type_charset ( & self . mime_type )
339
+ }
340
+
341
+ pub fn media_type ( & self ) -> MediaType {
342
+ let mut content_types = self . mime_type . split ( ';' ) ;
343
+ let Some ( content_type) = content_types. next ( ) else {
344
+ return MediaType :: Unknown ;
345
+ } ;
346
+ MediaType :: from_content_type (
347
+ // this data url will be ignored when resolving the MediaType
348
+ // as in this rare case the MediaType is determined solely based
349
+ // on the provided content type
350
+ & ModuleSpecifier :: parse ( "data:image/png;base64," ) . unwrap ( ) ,
351
+ content_type,
352
+ )
353
+ }
354
+
355
+ pub fn decode ( self ) -> Result < String , std:: io:: Error > {
356
+ let charset = get_mime_type_charset ( & self . mime_type ) . unwrap_or ( "utf-8" ) ;
357
+ decode_owned_source_with_charset ( self . bytes , charset)
358
+ }
359
+
360
+ pub fn into_bytes_and_headers ( self ) -> ( Vec < u8 > , HashMap < String , String > ) {
361
+ let headers = HashMap :: from ( [ ( "content-type" . to_string ( ) , self . mime_type ) ] ) ;
362
+ ( self . bytes , headers)
363
+ }
364
+ }
365
+
366
+ fn get_mime_type_charset ( mime_type : & str ) -> Option < & str > {
367
+ mime_type
368
+ . split ( ';' )
369
+ . skip ( 1 )
370
+ . map ( str:: trim)
371
+ . find_map ( |s| s. strip_prefix ( "charset=" ) )
372
+ }
373
+
323
374
/// An implementation of the loader attribute where the responses are provided
324
375
/// ahead of time. This is useful for testing or
325
376
#[ derive( Default ) ]
@@ -500,8 +551,10 @@ pub fn resolve_media_type_and_charset_from_content_type<'a>(
500
551
) -> ( MediaType , Option < & ' a str > ) {
501
552
if let Some ( content_type) = maybe_content_type {
502
553
let mut content_types = content_type. split ( ';' ) ;
503
- let content_type = content_types. next ( ) . unwrap ( ) ;
504
- let media_type = MediaType :: from_content_type ( specifier, content_type) ;
554
+ let media_type = content_types
555
+ . next ( )
556
+ . map ( |content_type| MediaType :: from_content_type ( specifier, content_type) )
557
+ . unwrap_or ( MediaType :: Unknown ) ;
505
558
let charset = content_types
506
559
. map ( str:: trim)
507
560
. find_map ( |s| s. strip_prefix ( "charset=" ) ) ;
@@ -512,6 +565,54 @@ pub fn resolve_media_type_and_charset_from_content_type<'a>(
512
565
}
513
566
}
514
567
568
+ /// Decodes the source bytes into a string handling any encoding rules
569
+ /// where the bytes may be from a remote module, file module, or other.
570
+ pub fn decode_owned_source (
571
+ specifier : & ModuleSpecifier ,
572
+ bytes : Vec < u8 > ,
573
+ maybe_charset : Option < & str > ,
574
+ ) -> Result < String , std:: io:: Error > {
575
+ let charset = maybe_charset. unwrap_or_else ( || {
576
+ if specifier. scheme ( ) == "file" {
577
+ text_encoding:: detect_charset ( & bytes)
578
+ } else {
579
+ "utf-8"
580
+ }
581
+ } ) ;
582
+ decode_owned_source_with_charset ( bytes, charset)
583
+ }
584
+
585
+ /// Decodes the source bytes into a string handling any encoding rules
586
+ /// where the source is a `file:` specifier.
587
+ pub fn decode_owned_file_source (
588
+ bytes : Vec < u8 > ,
589
+ ) -> Result < String , std:: io:: Error > {
590
+ let charset = text_encoding:: detect_charset ( & bytes) ;
591
+ decode_owned_source_with_charset ( bytes, charset)
592
+ }
593
+
594
+ fn decode_owned_source_with_charset (
595
+ bytes : Vec < u8 > ,
596
+ charset : & str ,
597
+ ) -> Result < String , std:: io:: Error > {
598
+ match text_encoding:: convert_to_utf8 ( & bytes, charset) ? {
599
+ Cow :: Borrowed ( text) => {
600
+ if text. starts_with ( text_encoding:: BOM_CHAR ) {
601
+ Ok ( text[ text_encoding:: BOM_CHAR . len_utf8 ( ) ..] . to_string ( ) )
602
+ } else {
603
+ Ok (
604
+ // SAFETY: we know it's a valid utf-8 string at this point
605
+ unsafe { String :: from_utf8_unchecked ( bytes) } ,
606
+ )
607
+ }
608
+ }
609
+ Cow :: Owned ( mut text) => {
610
+ text_encoding:: strip_bom_mut ( & mut text) ;
611
+ Ok ( text)
612
+ }
613
+ }
614
+ }
615
+
515
616
/// Decodes the source bytes into a string handling any encoding rules
516
617
/// for local vs remote files and dealing with the charset.
517
618
pub fn decode_source (
@@ -828,4 +929,81 @@ pub mod tests {
828
929
) ;
829
930
}
830
931
}
932
+
933
+ #[ test]
934
+ fn test_parse_valid_data_url ( ) {
935
+ let valid_data_url = "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" ;
936
+ let specifier = ModuleSpecifier :: parse ( valid_data_url) . unwrap ( ) ;
937
+ let raw_data_url = RawDataUrl :: parse ( & specifier) . unwrap ( ) ;
938
+ assert_eq ! ( raw_data_url. mime_type, "text/plain" ) ;
939
+ assert_eq ! ( raw_data_url. bytes, b"Hello, World!" ) ;
940
+ }
941
+
942
+ #[ test]
943
+ fn test_charset_with_valid_mime_type ( ) {
944
+ let raw_data_url = RawDataUrl {
945
+ mime_type : "text/plain; charset=utf-8" . to_string ( ) ,
946
+ bytes : vec ! [ ] ,
947
+ } ;
948
+ assert_eq ! ( raw_data_url. charset( ) , Some ( "utf-8" ) ) ;
949
+ }
950
+
951
+ #[ test]
952
+ fn test_charset_with_no_charset_in_mime_type ( ) {
953
+ let raw_data_url = RawDataUrl {
954
+ mime_type : "text/plain" . to_string ( ) ,
955
+ bytes : vec ! [ ] ,
956
+ } ;
957
+ assert_eq ! ( raw_data_url. charset( ) , None ) ;
958
+ }
959
+
960
+ #[ test]
961
+ fn test_media_type_with_known_type ( ) {
962
+ let raw_data_url = RawDataUrl {
963
+ mime_type : "application/javascript;charset=utf-8" . to_string ( ) ,
964
+ bytes : vec ! [ ] ,
965
+ } ;
966
+ assert_eq ! ( raw_data_url. media_type( ) , MediaType :: JavaScript ) ;
967
+ }
968
+
969
+ #[ test]
970
+ fn test_media_type_with_unknown_type ( ) {
971
+ let raw_data_url = RawDataUrl {
972
+ mime_type : "unknown/unknown" . to_string ( ) ,
973
+ bytes : vec ! [ ] ,
974
+ } ;
975
+ assert_eq ! ( raw_data_url. media_type( ) , MediaType :: Unknown ) ;
976
+ }
977
+
978
+ #[ test]
979
+ fn test_decode_with_valid_charset ( ) {
980
+ let raw_data_url = RawDataUrl {
981
+ mime_type : "text/plain; charset=utf-8" . to_string ( ) ,
982
+ bytes : "Hello, World!" . as_bytes ( ) . to_vec ( ) ,
983
+ } ;
984
+ assert_eq ! ( raw_data_url. decode( ) . unwrap( ) , "Hello, World!" ) ;
985
+ }
986
+
987
+ #[ test]
988
+ fn test_decode_with_invalid_charset ( ) {
989
+ let raw_data_url = RawDataUrl {
990
+ mime_type : "text/plain; charset=invalid-charset" . to_string ( ) ,
991
+ bytes : vec ! [ ] ,
992
+ } ;
993
+ assert ! ( raw_data_url. decode( ) . is_err( ) ) ;
994
+ }
995
+
996
+ #[ test]
997
+ fn test_into_bytes_and_headers ( ) {
998
+ let raw_data_url = RawDataUrl {
999
+ mime_type : "text/plain; charset=utf-8" . to_string ( ) ,
1000
+ bytes : "Hello, World!" . as_bytes ( ) . to_vec ( ) ,
1001
+ } ;
1002
+ let ( bytes, headers) = raw_data_url. into_bytes_and_headers ( ) ;
1003
+ assert_eq ! ( bytes, "Hello, World!" . as_bytes( ) ) ;
1004
+ assert_eq ! (
1005
+ headers. get( "content-type" ) . unwrap( ) ,
1006
+ "text/plain; charset=utf-8"
1007
+ ) ;
1008
+ }
831
1009
}
0 commit comments