@@ -88,6 +88,35 @@ diff_match_patch.Diff.prototype.toString = function() {
88
88
return this [ 0 ] + ',' + this [ 1 ] ;
89
89
} ;
90
90
91
+ diff_match_patch . prototype . isHighSurrogate = function ( c ) {
92
+ var v = c . charCodeAt ( 0 ) ;
93
+ return v >= 0xD800 && v <= 0xDBFF ;
94
+ }
95
+
96
+ diff_match_patch . prototype . isLowSurrogate = function ( c ) {
97
+ var v = c . charCodeAt ( 0 ) ;
98
+ return v >= 0xDC00 && v <= 0xDFFF ;
99
+ }
100
+
101
+ diff_match_patch . prototype . scalarValues = function ( str ) {
102
+ var length = str . length ;
103
+ var scalars = [ ] ;
104
+
105
+ for ( var i = 0 ; i < length ; i ++ ) {
106
+ var scalar = str [ i ] ;
107
+
108
+ // proper surrogate pairs will come through as the whole scalar value
109
+ // but if the pairs are broken they will be passed-through unaltered
110
+ if ( i < length - 1 && this . isHighSurrogate ( scalar ) && this . isLowSurrogate ( str [ i + 1 ] ) ) {
111
+ scalar += str [ i + 1 ] ;
112
+ i ++ ;
113
+ }
114
+
115
+ scalars . push ( scalar ) ;
116
+ }
117
+
118
+ return scalars ;
119
+ }
91
120
92
121
/**
93
122
* Find the differences between two texts. Simplifies the problem by stripping
@@ -134,12 +163,18 @@ diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines,
134
163
135
164
// Trim off common prefix (speedup).
136
165
var commonlength = this . diff_commonPrefix ( text1 , text2 ) ;
166
+ if ( commonlength > 0 && this . isHighSurrogate ( text1 [ commonlength - 1 ] ) ) {
167
+ commonlength -- ;
168
+ }
137
169
var commonprefix = text1 . substring ( 0 , commonlength ) ;
138
170
text1 = text1 . substring ( commonlength ) ;
139
171
text2 = text2 . substring ( commonlength ) ;
140
172
141
173
// Trim off common suffix (speedup).
142
174
commonlength = this . diff_commonSuffix ( text1 , text2 ) ;
175
+ if ( commonlength > 0 && this . isLowSurrogate ( text1 [ text1 . length - commonlength ] ) ) {
176
+ commonlength -- ;
177
+ }
143
178
var commonsuffix = text1 . substring ( text1 . length - commonlength ) ;
144
179
text1 = text1 . substring ( 0 , text1 . length - commonlength ) ;
145
180
text2 = text2 . substring ( 0 , text2 . length - commonlength ) ;
@@ -187,13 +222,23 @@ diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines,
187
222
188
223
var longtext = text1 . length > text2 . length ? text1 : text2 ;
189
224
var shorttext = text1 . length > text2 . length ? text2 : text1 ;
225
+ var shortlength = shorttext . length ;
190
226
var i = longtext . indexOf ( shorttext ) ;
191
227
if ( i != - 1 ) {
228
+ // skip leading unpaired surrogate
229
+ if ( this . isLowSurrogate ( longtext [ i ] ) ) {
230
+ shortlength -- ;
231
+ i ++ ;
232
+ }
233
+ // skip trailing unpaired surrogate
234
+ if ( this . isHighSurrogate ( longtext [ i + shortlength ] ) ) {
235
+ shortlength -- ;
236
+ }
192
237
// Shorter text is inside the longer text (speedup).
193
238
diffs = [ new diff_match_patch . Diff ( DIFF_INSERT , longtext . substring ( 0 , i ) ) ,
194
239
new diff_match_patch . Diff ( DIFF_EQUAL , shorttext ) ,
195
240
new diff_match_patch . Diff ( DIFF_INSERT ,
196
- longtext . substring ( i + shorttext . length ) ) ] ;
241
+ longtext . substring ( i + shortlength ) ) ] ;
197
242
// Swap insertions for deletions if diff is reversed.
198
243
if ( text1 . length > text2 . length ) {
199
244
diffs [ 0 ] [ 0 ] = diffs [ 2 ] [ 0 ] = DIFF_DELETE ;
@@ -439,6 +484,15 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) {
439
484
*/
440
485
diff_match_patch . prototype . diff_bisectSplit_ = function ( text1 , text2 , x , y ,
441
486
deadline ) {
487
+ // backup if we split a surrogate
488
+ if (
489
+ x > 0 && x < text1 . length && this . isLowSurrogate ( text1 [ x ] ) &&
490
+ y > 0 && y < text2 . length && this . isLowSurrogate ( text2 [ y ] )
491
+ ) {
492
+ x -- ;
493
+ y -- ;
494
+ }
495
+
442
496
var text1a = text1 . substring ( 0 , x ) ;
443
497
var text2a = text2 . substring ( 0 , y ) ;
444
498
var text1b = text1 . substring ( x ) ;
@@ -569,6 +623,12 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) {
569
623
}
570
624
pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
571
625
}
626
+
627
+ // shorten the prefix if it splits a surrogate
628
+ if ( pointermid > 0 && this . isHighSurrogate ( text1 [ pointermid - 1 ] ) ) {
629
+ pointermid -- ;
630
+ }
631
+
572
632
return pointermid ;
573
633
} ;
574
634
@@ -601,6 +661,12 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) {
601
661
}
602
662
pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
603
663
}
664
+
665
+ // shorten the suffix if it splits a surrogate
666
+ if ( pointermid < length - 1 && this . isLowSurrogate ( text1 [ pointermid ] ) ) {
667
+ pointermid ++ ;
668
+ }
669
+
604
670
return pointermid ;
605
671
} ;
606
672
@@ -749,6 +815,24 @@ diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) {
749
815
text1_b = hm [ 3 ] ;
750
816
}
751
817
var mid_common = hm [ 4 ] ;
818
+
819
+ // move forward to prevent splitting a surrogate pair
820
+ if ( mid_common . length > 0 && this . isLowSurrogate ( mid_common [ 0 ] ) ) {
821
+ text1_a = text1_a + mid_common [ 0 ] ;
822
+ text2_a = text2_a + mid_common [ 0 ] ;
823
+ mid_common = mid_common . substring ( 1 ) ;
824
+ }
825
+
826
+ // back up to prevent splitting a surrogate pair
827
+ if (
828
+ text1_b . length > 0 && this . isLowSurrogate ( text1_b [ 0 ] ) &&
829
+ text2_b . length > 0 && this . isLowSurrogate ( text2_b [ 0 ] )
830
+ ) {
831
+ text1_b = mid_common [ mid_common . length - 1 ] + text1_b ;
832
+ text2_b = mid_common [ mid_common . length - 1 ] + text2_b ;
833
+ mid_common = mid_common . substring ( 0 , - 1 ) ;
834
+ }
835
+
752
836
return [ text1_a , text1_b , text2_a , text2_b , mid_common ] ;
753
837
} ;
754
838
0 commit comments