@@ -19,6 +19,7 @@ use imara_diff::{
1919} ; 
2020use  pulldown_cmark_escape:: FmtWriter ; 
2121use  regex:: Regex ; 
22+ use  unicode_segmentation:: UnicodeSegmentation ; 
2223
2324use  crate :: github:: GithubCompare ; 
2425use  crate :: utils:: is_repo_autorized; 
@@ -249,18 +250,34 @@ fn process_old_new(
249250      background-color: rgba(150, 255, 150, 1); 
250251      white-space: pre; 
251252    }} 
252-     .removed- line-after {{ 
253+     .line-removed -after {{ 
253254      color: rgb(220, 0, 0) 
254255    }} 
255-     .added- line-after {{ 
256+     .line-added -after {{ 
256257      color: rgb(0, 73, 0) 
257258    }} 
258-     .removed- line-before {{ 
259+     .line-removed -before {{ 
259260      color: rgb(192, 78, 76) 
260261    }} 
261-     .added- line-before {{ 
262+     .line-added -before {{ 
262263      color: rgb(63, 128, 94) 
263264    }} 
265+     .word-removed-after {{ 
266+       color: white; 
267+       background-color: rgb(220, 0, 0); 
268+     }} 
269+     .word-added-after {{ 
270+       color: white; 
271+       background-color: rgb(0, 73, 0); 
272+     }} 
273+     .word-removed-before {{ 
274+       color: white; 
275+       background-color: rgb(192, 78, 76); 
276+     }} 
277+     .word-added-before {{ 
278+       color: white; 
279+       background-color: rgb(63, 128, 94); 
280+     }} 
264281    @media (prefers-color-scheme: dark) {{ 
265282      body {{ 
266283        background: #0C0C0C; 
@@ -277,18 +294,34 @@ fn process_old_new(
277294        background-color: rgba(70, 120, 70, 1); 
278295        white-space: pre; 
279296      }} 
280-       .removed- line-after {{ 
297+       .line-removed -after {{ 
281298        color: rgba(255, 0, 0, 1); 
282299      }} 
283-       .added- line-after {{ 
300+       .line-added -after {{ 
284301        color: rgba(0, 255, 0, 1); 
285302      }} 
286-       .removed- line-before {{ 
303+       .line-removed -before {{ 
287304        color: rgba(100, 0, 0, 1); 
288305      }} 
289-       .added- line-before {{ 
306+       .line-added -before {{ 
290307        color: rgba(0, 100, 0, 1); 
291308      }} 
309+       .word-removed-after {{ 
310+         color: black; 
311+         background-color: rgba(255, 0, 0, 1); 
312+       }} 
313+       .word-added-after {{ 
314+         color: black; 
315+         background-color: rgba(0, 255, 0, 1); 
316+       }} 
317+       .word-removed-before {{ 
318+         color: black; 
319+         background-color: rgba(100, 0, 0, 1); 
320+       }} 
321+       .word-added-before {{ 
322+         color: black; 
323+         background-color: rgba(0, 100, 0, 1); 
324+       }} 
292325    }} 
293326    </style> 
294327</head> 
@@ -400,6 +433,7 @@ fn process_old_new(
400433const  REMOVED_BLOCK_SIGN :  & str  = r#"<span class="removed-block"> - </span>"# ; 
401434const  ADDED_BLOCK_SIGN :  & str  = r#"<span class="added-block"> + </span>"# ; 
402435
436+ #[ derive( Copy ,  Clone ) ]  
403437enum  HunkTokenStatus  { 
404438    Added , 
405439    Removed , 
@@ -408,39 +442,56 @@ enum HunkTokenStatus {
408442struct  HtmlDiffPrinter < ' a > ( pub  & ' a  Interner < & ' a  str > ) ; 
409443
410444impl  HtmlDiffPrinter < ' _ >  { 
411-     fn  handle_hunk_token ( 
445+     fn  handle_hunk_line < ' a > ( 
412446        & self , 
413447        mut  f :  impl  fmt:: Write , 
414448        hunk_token_status :  HunkTokenStatus , 
415-         token :   & str , 
449+         words :   impl   Iterator < Item  =  ( & ' a   str ,   bool ) > , 
416450    )  -> fmt:: Result  { 
417451        // Show the hunk status 
418452        match  hunk_token_status { 
419453            HunkTokenStatus :: Added  => write ! ( f,  "{ADDED_BLOCK_SIGN} " ) ?, 
420454            HunkTokenStatus :: Removed  => write ! ( f,  "{REMOVED_BLOCK_SIGN} " ) ?, 
421455        } ; 
422456
423-         let  is_add = token. starts_with ( '+' ) ; 
424-         let  is_remove = token. starts_with ( '-' ) ; 
457+         let  mut  words = words. peekable ( ) ; 
458+ 
459+         let  first_word = words. peek ( ) ; 
460+         let  is_add = first_word. map ( |w| w. 0 . starts_with ( '+' ) ) . unwrap_or_default ( ) ; 
461+         let  is_remove = first_word. map ( |w| w. 0 . starts_with ( '-' ) ) . unwrap_or_default ( ) ; 
425462
426463        // Highlight in the same was as `git range-diff` does for diff-lines 
427-         // that changed. (Contrary to `git range-diff` we don't color unchanged 
464+         // that changed. In addition we also do word highlighting. 
465+         // 
466+         // (Contrary to `git range-diff` we don't color unchanged 
428467        // diff lines though, since then the coloring distracts from what is 
429468        // relevant.) 
430469        if  is_add || is_remove { 
431-             let  class  = match  ( hunk_token_status,  is_add)  { 
432-                 ( HunkTokenStatus :: Removed ,  true )  => "added-line- before" , 
433-                 ( HunkTokenStatus :: Removed ,  false )  => "removed-line- before" , 
434-                 ( HunkTokenStatus :: Added ,  true )  => "added-line- after" , 
435-                 ( HunkTokenStatus :: Added ,  false )  => "removed-line- after" , 
470+             let  prefix_class  = match  ( hunk_token_status,  is_add)  { 
471+                 ( HunkTokenStatus :: Removed ,  true )  => "added-before" , 
472+                 ( HunkTokenStatus :: Removed ,  false )  => "removed-before" , 
473+                 ( HunkTokenStatus :: Added ,  true )  => "added-after" , 
474+                 ( HunkTokenStatus :: Added ,  false )  => "removed-after" , 
436475            } ; 
476+             write ! ( f,  r#"<span class="line-{prefix_class}">"# ) ?; 
477+ 
478+             for  ( word,  changed)  in  words { 
479+                 if  changed { 
480+                     write ! ( f,  r#"<span class="word-{prefix_class}">"# ) ?; 
481+                     pulldown_cmark_escape:: escape_html ( FmtWriter ( & mut  f) ,  word) ?; 
482+                     write ! ( f,  "</span>" ) ?; 
483+                 }  else  { 
484+                     pulldown_cmark_escape:: escape_html ( FmtWriter ( & mut  f) ,  word) ?; 
485+                 } 
486+             } 
437487
438-             write ! ( f,  r#"<span class="{class}">"# ) ?; 
439-             pulldown_cmark_escape:: escape_html ( FmtWriter ( & mut  f) ,  token) ?; 
440488            write ! ( f,  "</span>" ) ?; 
441489        }  else  { 
442-             pulldown_cmark_escape:: escape_html ( FmtWriter ( & mut  f) ,  token) ?; 
490+             for  ( word,  _status)  in  words { 
491+                 pulldown_cmark_escape:: escape_html ( FmtWriter ( & mut  f) ,  word) ?; 
492+             } 
443493        } 
494+ 
444495        Ok ( ( ) ) 
445496    } 
446497} 
@@ -474,23 +525,82 @@ impl UnifiedDiffPrinter for HtmlDiffPrinter<'_> {
474525        before :  & [ Token ] , 
475526        after :  & [ Token ] , 
476527    )  -> fmt:: Result  { 
477-         if  let  Some ( & last)  = before. last ( )  { 
478-             for  & token in  before { 
479-                 let  token = self . 0 [ token] ; 
480-                 self . handle_hunk_token ( & mut  f,  HunkTokenStatus :: Removed ,  token) ?; 
528+         // To improve on the line-by-line diff we also want to do a sort of `git --words-diff` 
529+         // (aka word highlighting). To achieve word highlighting, we only consider hunk that 
530+         // have the same number of lines removed and added, otherwise it's much more complex 
531+         // to link the changes together. 
532+ 
533+         if  before. len ( )  == after. len ( )  { 
534+             // Same number of lines before and after, can do word-hightling. 
535+ 
536+             // Diff the individual lines together. 
537+             let  diffs_and_inputs:  Vec < _ >  = before
538+                 . into_iter ( ) 
539+                 . zip ( after. into_iter ( ) ) 
540+                 . map ( |( b_token,  a_token) | { 
541+                     // Split both lines by words and intern them. 
542+                     let  input:  InternedInput < & str >  = InternedInput :: new ( 
543+                         SplitWordBoundaries ( self . 0 [ * b_token] ) , 
544+                         SplitWordBoundaries ( self . 0 [ * a_token] ) , 
545+                     ) ; 
546+ 
547+                     // Compute the (word) diff 
548+                     let  diff = Diff :: compute ( Algorithm :: Histogram ,  & input) ; 
549+ 
550+                     ( diff,  input) 
551+                 } ) 
552+                 . collect ( ) ; 
553+ 
554+             // Process all before lines first 
555+             for  ( diff,  input)  in  diffs_and_inputs. iter ( )  { 
556+                 self . handle_hunk_line ( 
557+                     & mut  f, 
558+                     HunkTokenStatus :: Removed , 
559+                     input. before . iter ( ) . enumerate ( ) . map ( |( b_pos,  b_token) | { 
560+                         ( input. interner [ * b_token] ,  diff. is_removed ( b_pos as  u32 ) ) 
561+                     } ) , 
562+                 ) ?; 
481563            } 
482-             if  !self . 0 [ last] . ends_with ( '\n' )  { 
483-                 writeln ! ( f) ?; 
484-             } 
485-         } 
486564
487-         if  let  Some ( & last)  = after. last ( )  { 
488-             for  & token in  after { 
489-                 let  token = self . 0 [ token] ; 
490-                 self . handle_hunk_token ( & mut  f,  HunkTokenStatus :: Added ,  token) ?; 
565+             // Then process all after lines 
566+             for  ( diff,  input)  in  diffs_and_inputs. iter ( )  { 
567+                 self . handle_hunk_line ( 
568+                     & mut  f, 
569+                     HunkTokenStatus :: Added , 
570+                     input. after . iter ( ) . enumerate ( ) . map ( |( a_pos,  a_token) | { 
571+                         ( input. interner [ * a_token] ,  diff. is_added ( a_pos as  u32 ) ) 
572+                     } ) , 
573+                 ) ?; 
574+             } 
575+         }  else  { 
576+             // Can't do word-highlighting, simply print each line. 
577+ 
578+             if  let  Some ( & last)  = before. last ( )  { 
579+                 for  & token in  before { 
580+                     let  token = self . 0 [ token] ; 
581+                     self . handle_hunk_line ( 
582+                         & mut  f, 
583+                         HunkTokenStatus :: Removed , 
584+                         std:: iter:: once ( ( token,  false ) ) , 
585+                     ) ?; 
586+                 } 
587+                 if  !self . 0 [ last] . ends_with ( '\n' )  { 
588+                     writeln ! ( f) ?; 
589+                 } 
491590            } 
492-             if  !self . 0 [ last] . ends_with ( '\n' )  { 
493-                 writeln ! ( f) ?; 
591+ 
592+             if  let  Some ( & last)  = after. last ( )  { 
593+                 for  & token in  after { 
594+                     let  token = self . 0 [ token] ; 
595+                     self . handle_hunk_line ( 
596+                         & mut  f, 
597+                         HunkTokenStatus :: Added , 
598+                         std:: iter:: once ( ( token,  false ) ) , 
599+                     ) ?; 
600+                 } 
601+                 if  !self . 0 [ last] . ends_with ( '\n' )  { 
602+                     writeln ! ( f) ?; 
603+                 } 
494604            } 
495605        } 
496606        Ok ( ( ) ) 
@@ -514,3 +624,20 @@ fn bookmarklet(host: &str) -> String {
514624}})();" 
515625    ) 
516626} 
627+ 
628+ // Simple abstraction over `unicode_segmentation::split_word_bounds` for `imara_diff::TokenSource` 
629+ struct  SplitWordBoundaries < ' a > ( & ' a  str ) ; 
630+ 
631+ impl < ' a >  imara_diff:: TokenSource  for  SplitWordBoundaries < ' a >  { 
632+     type  Token  = & ' a  str ; 
633+     type  Tokenizer  = unicode_segmentation:: UWordBounds < ' a > ; 
634+ 
635+     fn  tokenize ( & self )  -> Self :: Tokenizer  { 
636+         self . 0 . split_word_bounds ( ) 
637+     } 
638+ 
639+     fn  estimate_tokens ( & self )  -> u32  { 
640+         // https://www.wyliecomm.com/2021/11/whats-the-best-length-of-a-word-online/ 
641+         ( self . 0 . len ( )  as  f32  / 4.7f32 )  as  u32 
642+     } 
643+ } 
0 commit comments