@@ -874,17 +874,17 @@ diff_match_patch.prototype.diff_cleanupSemantic = function(diffs) {
874
874
*/
875
875
diff_match_patch . prototype . diff_cleanupSemanticLossless = function ( diffs ) {
876
876
/**
877
- * Given two strings , compute a score representing whether the internal
877
+ * Given a string and a boundary , compute a score representing whether the
878
878
* boundary falls on logical boundaries.
879
879
* Scores range from 6 (best) to 0 (worst).
880
880
* Closure, but does not reference any external variables.
881
- * @param {string } one First string .
882
- * @param {string } two Second string .
881
+ * @param {string } buffer String containing the boundary and surrounding text .
882
+ * @param {number } index Index of the boundary .
883
883
* @return {number } The score.
884
884
* @private
885
885
*/
886
- function diff_cleanupSemanticScore_ ( one , two ) {
887
- if ( ! one || ! two ) {
886
+ function diff_cleanupSemanticScore_ ( buffer , index ) {
887
+ if ( index === 0 || index === buffer . length ) {
888
888
// Edges are the best.
889
889
return 6 ;
890
890
}
@@ -894,8 +894,8 @@ diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
894
894
// 'whitespace'. Since this function's purpose is largely cosmetic,
895
895
// the choice has been made to use each language's native features
896
896
// rather than force total conformity.
897
- var char1 = one . charAt ( one . length - 1 ) ;
898
- var char2 = two . charAt ( 0 ) ;
897
+ var char1 = buffer . charAt ( index - 1 ) ;
898
+ var char2 = buffer . charAt ( index ) ;
899
899
var nonAlphaNumeric1 = char1 . match ( diff_match_patch . nonAlphaNumericRegex_ ) ;
900
900
var nonAlphaNumeric2 = char2 . match ( diff_match_patch . nonAlphaNumericRegex_ ) ;
901
901
var whitespace1 = nonAlphaNumeric1 &&
@@ -907,9 +907,11 @@ diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
907
907
var lineBreak2 = whitespace2 &&
908
908
char2 . match ( diff_match_patch . linebreakRegex_ ) ;
909
909
var blankLine1 = lineBreak1 &&
910
- one . match ( diff_match_patch . blanklineEndRegex_ ) ;
910
+ buffer . substring ( index - diff_match_patch . blanklineEndRegexMaxLength_ , index )
911
+ . match ( diff_match_patch . blanklineEndRegex_ ) ;
911
912
var blankLine2 = lineBreak2 &&
912
- two . match ( diff_match_patch . blanklineStartRegex_ ) ;
913
+ buffer . substring ( index , index + diff_match_patch . blanklineStartRegexMaxLength_ )
914
+ . match ( diff_match_patch . blanklineStartRegex_ ) ;
913
915
914
916
if ( blankLine1 || blankLine2 ) {
915
917
// Five points for blank lines.
@@ -939,48 +941,45 @@ diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
939
941
var equality1 = diffs [ pointer - 1 ] [ 1 ] ;
940
942
var edit = diffs [ pointer ] [ 1 ] ;
941
943
var equality2 = diffs [ pointer + 1 ] [ 1 ] ;
944
+ var buffer = equality1 + edit + equality2 ;
942
945
943
946
// First, shift the edit as far left as possible.
944
- var commonOffset = this . diff_commonSuffix ( equality1 , edit ) ;
945
- if ( commonOffset ) {
946
- var commonString = edit . substring ( edit . length - commonOffset ) ;
947
- equality1 = equality1 . substring ( 0 , equality1 . length - commonOffset ) ;
948
- edit = commonString + edit . substring ( 0 , edit . length - commonOffset ) ;
949
- equality2 = commonString + equality2 ;
950
- }
947
+ var offsetLeft = this . diff_commonSuffix ( equality1 , edit ) ;
948
+ var offsetRight = this . diff_commonPrefix ( edit , equality2 ) ;
949
+ var originalEditStart = equality1 . length ;
950
+ var editStart = originalEditStart - offsetLeft ;
951
+ var maxEditStart = originalEditStart + offsetRight ;
952
+ var editEnd = editStart + edit . length ;
951
953
952
954
// Second, step character by character right, looking for the best fit.
953
- var bestEquality1 = equality1 ;
954
- var bestEdit = edit ;
955
- var bestEquality2 = equality2 ;
956
- var bestScore = diff_cleanupSemanticScore_ ( equality1 , edit ) +
957
- diff_cleanupSemanticScore_ ( edit , equality2 ) ;
958
- while ( edit . charAt ( 0 ) === equality2 . charAt ( 0 ) ) {
959
- equality1 += edit . charAt ( 0 ) ;
960
- edit = edit . substring ( 1 ) + equality2 . charAt ( 0 ) ;
961
- equality2 = equality2 . substring ( 1 ) ;
962
- var score = diff_cleanupSemanticScore_ ( equality1 , edit ) +
963
- diff_cleanupSemanticScore_ ( edit , equality2 ) ;
955
+ var bestEditStart = editStart ;
956
+ var bestEditEnd = editEnd ;
957
+ var bestScore = diff_cleanupSemanticScore_ ( buffer , editStart ) +
958
+ diff_cleanupSemanticScore_ ( buffer , editEnd ) ;
959
+ while ( editStart < maxEditStart ) {
960
+ editStart += 1 ;
961
+ editEnd += 1 ;
962
+ var score = diff_cleanupSemanticScore_ ( buffer , editStart ) +
963
+ diff_cleanupSemanticScore_ ( buffer , editEnd ) ;
964
964
// The >= encourages trailing rather than leading whitespace on edits.
965
965
if ( score >= bestScore ) {
966
966
bestScore = score ;
967
- bestEquality1 = equality1 ;
968
- bestEdit = edit ;
969
- bestEquality2 = equality2 ;
967
+ bestEditStart = editStart ;
968
+ bestEditEnd = editEnd ;
970
969
}
971
970
}
972
971
973
- if ( diffs [ pointer - 1 ] [ 1 ] != bestEquality1 ) {
972
+ if ( bestEditStart != originalEditStart ) {
974
973
// We have an improvement, save it back to the diff.
975
- if ( bestEquality1 ) {
976
- diffs [ pointer - 1 ] [ 1 ] = bestEquality1 ;
974
+ if ( bestEditStart > 0 ) {
975
+ diffs [ pointer - 1 ] [ 1 ] = buffer . substring ( 0 , bestEditStart ) ;
977
976
} else {
978
977
diffs . splice ( pointer - 1 , 1 ) ;
979
978
pointer -- ;
980
979
}
981
- diffs [ pointer ] [ 1 ] = bestEdit ;
982
- if ( bestEquality2 ) {
983
- diffs [ pointer + 1 ] [ 1 ] = bestEquality2 ;
980
+ diffs [ pointer ] [ 1 ] = buffer . substring ( bestEditStart , bestEditEnd ) ;
981
+ if ( bestEditEnd < buffer . length ) {
982
+ diffs [ pointer + 1 ] [ 1 ] = buffer . substring ( bestEditEnd ) ;
984
983
} else {
985
984
diffs . splice ( pointer + 1 , 1 ) ;
986
985
pointer -- ;
@@ -998,6 +997,10 @@ diff_match_patch.linebreakRegex_ = /[\r\n]/;
998
997
diff_match_patch . blanklineEndRegex_ = / \n \r ? \n $ / ;
999
998
diff_match_patch . blanklineStartRegex_ = / ^ \r ? \n \r ? \n / ;
1000
999
1000
+ // Maximum length of a match for blank line regexes
1001
+ diff_match_patch . blanklineEndRegexMaxLength_ = 3 ;
1002
+ diff_match_patch . blanklineStartRegexMaxLength_ = 4 ;
1003
+
1001
1004
/**
1002
1005
* Reduce the number of edits by eliminating operationally trivial equalities.
1003
1006
* @param {!Array.<!diff_match_patch.Diff> } diffs Array of diff tuples.
0 commit comments