Skip to content
This repository was archived by the owner on Aug 5, 2024. It is now read-only.

Commit 0bf9b68

Browse files
committed
Speed up the semantic alignment loop in Javascript
Some diffs result in the semantic alignment loop being run many times. This happens when comparing a file containing a long chunk of characters with a similar file containing the same long chunk of characters twice in succession. Manipulating indexes rather than creating new strings at each iteration makes the loop run much more quickly.
1 parent 62f2e68 commit 0bf9b68

File tree

1 file changed

+39
-36
lines changed

1 file changed

+39
-36
lines changed

javascript/diff_match_patch_uncompressed.js

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -874,17 +874,17 @@ diff_match_patch.prototype.diff_cleanupSemantic = function(diffs) {
874874
*/
875875
diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
876876
/**
877-
* Given two strings, compute a score representing whether the internal
877+
* Given a string and a boundary, compute a score representing whether the
878878
* boundary falls on logical boundaries.
879879
* Scores range from 6 (best) to 0 (worst).
880880
* Closure, but does not reference any external variables.
881-
* @param {string} one First string.
882-
* @param {string} two Second string.
881+
* @param {string} buffer String containing the boundary and surrounding text.
882+
* @param {number} index Index of the boundary.
883883
* @return {number} The score.
884884
* @private
885885
*/
886-
function diff_cleanupSemanticScore_(one, two) {
887-
if (!one || !two) {
886+
function diff_cleanupSemanticScore_(buffer, index) {
887+
if (index === 0 || index === buffer.length) {
888888
// Edges are the best.
889889
return 6;
890890
}
@@ -894,8 +894,8 @@ diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
894894
// 'whitespace'. Since this function's purpose is largely cosmetic,
895895
// the choice has been made to use each language's native features
896896
// rather than force total conformity.
897-
var char1 = one.charAt(one.length - 1);
898-
var char2 = two.charAt(0);
897+
var char1 = buffer.charAt(index - 1);
898+
var char2 = buffer.charAt(index);
899899
var nonAlphaNumeric1 = char1.match(diff_match_patch.nonAlphaNumericRegex_);
900900
var nonAlphaNumeric2 = char2.match(diff_match_patch.nonAlphaNumericRegex_);
901901
var whitespace1 = nonAlphaNumeric1 &&
@@ -907,9 +907,11 @@ diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
907907
var lineBreak2 = whitespace2 &&
908908
char2.match(diff_match_patch.linebreakRegex_);
909909
var blankLine1 = lineBreak1 &&
910-
one.match(diff_match_patch.blanklineEndRegex_);
910+
buffer.substring(index - diff_match_patch.blanklineEndRegexMaxLength_, index)
911+
.match(diff_match_patch.blanklineEndRegex_);
911912
var blankLine2 = lineBreak2 &&
912-
two.match(diff_match_patch.blanklineStartRegex_);
913+
buffer.substring(index, index + diff_match_patch.blanklineStartRegexMaxLength_)
914+
.match(diff_match_patch.blanklineStartRegex_);
913915

914916
if (blankLine1 || blankLine2) {
915917
// Five points for blank lines.
@@ -939,48 +941,45 @@ diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) {
939941
var equality1 = diffs[pointer - 1][1];
940942
var edit = diffs[pointer][1];
941943
var equality2 = diffs[pointer + 1][1];
944+
var buffer = equality1 + edit + equality2;
942945

943946
// First, shift the edit as far left as possible.
944-
var commonOffset = this.diff_commonSuffix(equality1, edit);
945-
if (commonOffset) {
946-
var commonString = edit.substring(edit.length - commonOffset);
947-
equality1 = equality1.substring(0, equality1.length - commonOffset);
948-
edit = commonString + edit.substring(0, edit.length - commonOffset);
949-
equality2 = commonString + equality2;
950-
}
947+
var offsetLeft = this.diff_commonSuffix(equality1, edit);
948+
var offsetRight = this.diff_commonPrefix(edit, equality2);
949+
var originalEditStart = equality1.length;
950+
var editStart = originalEditStart - offsetLeft;
951+
var maxEditStart = originalEditStart + offsetRight;
952+
var editEnd = editStart + edit.length;
951953

952954
// Second, step character by character right, looking for the best fit.
953-
var bestEquality1 = equality1;
954-
var bestEdit = edit;
955-
var bestEquality2 = equality2;
956-
var bestScore = diff_cleanupSemanticScore_(equality1, edit) +
957-
diff_cleanupSemanticScore_(edit, equality2);
958-
while (edit.charAt(0) === equality2.charAt(0)) {
959-
equality1 += edit.charAt(0);
960-
edit = edit.substring(1) + equality2.charAt(0);
961-
equality2 = equality2.substring(1);
962-
var score = diff_cleanupSemanticScore_(equality1, edit) +
963-
diff_cleanupSemanticScore_(edit, equality2);
955+
var bestEditStart = editStart;
956+
var bestEditEnd = editEnd;
957+
var bestScore = diff_cleanupSemanticScore_(buffer, editStart) +
958+
diff_cleanupSemanticScore_(buffer, editEnd);
959+
while (editStart < maxEditStart) {
960+
editStart += 1;
961+
editEnd += 1;
962+
var score = diff_cleanupSemanticScore_(buffer, editStart) +
963+
diff_cleanupSemanticScore_(buffer, editEnd);
964964
// The >= encourages trailing rather than leading whitespace on edits.
965965
if (score >= bestScore) {
966966
bestScore = score;
967-
bestEquality1 = equality1;
968-
bestEdit = edit;
969-
bestEquality2 = equality2;
967+
bestEditStart = editStart;
968+
bestEditEnd = editEnd;
970969
}
971970
}
972971

973-
if (diffs[pointer - 1][1] != bestEquality1) {
972+
if (bestEditStart != originalEditStart) {
974973
// We have an improvement, save it back to the diff.
975-
if (bestEquality1) {
976-
diffs[pointer - 1][1] = bestEquality1;
974+
if (bestEditStart > 0) {
975+
diffs[pointer - 1][1] = buffer.substring(0, bestEditStart);
977976
} else {
978977
diffs.splice(pointer - 1, 1);
979978
pointer--;
980979
}
981-
diffs[pointer][1] = bestEdit;
982-
if (bestEquality2) {
983-
diffs[pointer + 1][1] = bestEquality2;
980+
diffs[pointer][1] = buffer.substring(bestEditStart, bestEditEnd);
981+
if (bestEditEnd < buffer.length) {
982+
diffs[pointer + 1][1] = buffer.substring(bestEditEnd);
984983
} else {
985984
diffs.splice(pointer + 1, 1);
986985
pointer--;
@@ -998,6 +997,10 @@ diff_match_patch.linebreakRegex_ = /[\r\n]/;
998997
diff_match_patch.blanklineEndRegex_ = /\n\r?\n$/;
999998
diff_match_patch.blanklineStartRegex_ = /^\r?\n\r?\n/;
1000999

1000+
// Maximum length of a match for blank line regexes
1001+
diff_match_patch.blanklineEndRegexMaxLength_ = 3;
1002+
diff_match_patch.blanklineStartRegexMaxLength_ = 4;
1003+
10011004
/**
10021005
* Reduce the number of edits by eliminating operationally trivial equalities.
10031006
* @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples.

0 commit comments

Comments
 (0)