19
19
package name .fraser .neil .plaintext ;
20
20
21
21
import java .io .UnsupportedEncodingException ;
22
+ import java .lang .Character ;
22
23
import java .net .URLDecoder ;
23
24
import java .net .URLEncoder ;
24
25
import java .util .*;
@@ -1429,7 +1430,31 @@ public int diff_levenshtein(List<Diff> diffs) {
1429
1430
*/
1430
1431
public String diff_toDelta (List <Diff > diffs ) {
1431
1432
StringBuilder text = new StringBuilder ();
1433
+ char lastEnd = 0 ;
1434
+ boolean isFirst = true ;
1432
1435
for (Diff aDiff : diffs ) {
1436
+ if (aDiff .text .isEmpty ()) {
1437
+ continue ;
1438
+ }
1439
+
1440
+ char thisTop = aDiff .text .charAt (0 );
1441
+ char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
1442
+
1443
+ if (Character .isHighSurrogate (thisEnd )) {
1444
+ lastEnd = thisEnd ;
1445
+ aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1446
+ }
1447
+
1448
+ if (! isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1449
+ aDiff .text = lastEnd + aDiff .text ;
1450
+ }
1451
+
1452
+ isFirst = false ;
1453
+
1454
+ if ( aDiff .text .isEmpty () ) {
1455
+ continue ;
1456
+ }
1457
+
1433
1458
switch (aDiff .operation ) {
1434
1459
case INSERT :
1435
1460
try {
@@ -1457,6 +1482,103 @@ public String diff_toDelta(List<Diff> diffs) {
1457
1482
return delta ;
1458
1483
}
1459
1484
1485
+ private int digit16 (char b ) throws IllegalArgumentException {
1486
+ switch (b ) {
1487
+ case '0' : return 0 ;
1488
+ case '1' : return 1 ;
1489
+ case '2' : return 2 ;
1490
+ case '3' : return 3 ;
1491
+ case '4' : return 4 ;
1492
+ case '5' : return 5 ;
1493
+ case '6' : return 6 ;
1494
+ case '7' : return 7 ;
1495
+ case '8' : return 8 ;
1496
+ case '9' : return 9 ;
1497
+ case 'A' : case 'a' : return 10 ;
1498
+ case 'B' : case 'b' : return 11 ;
1499
+ case 'C' : case 'c' : return 12 ;
1500
+ case 'D' : case 'd' : return 13 ;
1501
+ case 'E' : case 'e' : return 14 ;
1502
+ case 'F' : case 'f' : return 15 ;
1503
+ default :
1504
+ throw new IllegalArgumentException ();
1505
+ }
1506
+ }
1507
+
1508
+ private String decodeURI (String text ) throws IllegalArgumentException {
1509
+ int i = 0 ;
1510
+ StringBuilder decoded = new StringBuilder (text .length ());
1511
+
1512
+ while (i < text .length ()) {
1513
+ if (text .charAt (i ) != '%' ) {
1514
+ decoded .append (text .charAt (i ++));
1515
+ continue ;
1516
+ }
1517
+
1518
+ // start a percent-sequence
1519
+ int byte1 = (digit16 (text .charAt (i + 1 )) << 4 ) + digit16 (text .charAt (i + 2 ));
1520
+ if ((byte1 & 0x80 ) == 0 ) {
1521
+ decoded .append (Character .toChars (byte1 ));
1522
+ i += 3 ;
1523
+ continue ;
1524
+ }
1525
+
1526
+ if ( text .charAt (i + 3 ) != '%' ) {
1527
+ throw new IllegalArgumentException ();
1528
+ }
1529
+
1530
+ int byte2 = (digit16 (text .charAt (i + 4 )) << 4 ) + digit16 (text .charAt (i + 5 ));
1531
+ if ((byte2 & 0xC0 ) != 0x80 ) {
1532
+ throw new IllegalArgumentException ();
1533
+ }
1534
+ byte2 = byte2 & 0x3F ;
1535
+ if ((byte1 & 0xE0 ) == 0xC0 ) {
1536
+ decoded .append (Character .toChars (((byte1 & 0x1F ) << 6 ) | byte2 ));
1537
+ i += 6 ;
1538
+ continue ;
1539
+ }
1540
+
1541
+ if (text .charAt (i + 6 ) != '%' ) {
1542
+ throw new IllegalArgumentException ();
1543
+ }
1544
+
1545
+ int byte3 = (digit16 (text .charAt (i + 7 )) << 4 ) + digit16 (text .charAt (i + 8 ));
1546
+ if ((byte3 & 0xC0 ) != 0x80 ) {
1547
+ throw new IllegalArgumentException ();
1548
+ }
1549
+ byte3 = byte3 & 0x3F ;
1550
+ if ((byte1 & 0xF0 ) == 0xE0 ) {
1551
+ // unpaired surrogate are fine here
1552
+ decoded .append (Character .toChars (((byte1 & 0x0F ) << 12 ) | (byte2 << 6 ) | byte3 ));
1553
+ i += 9 ;
1554
+ continue ;
1555
+ }
1556
+
1557
+ if (text .charAt (i + 9 ) != '%' ) {
1558
+ throw new IllegalArgumentException ();
1559
+ }
1560
+
1561
+ int byte4 = (digit16 (text .charAt (i + 10 )) << 4 ) + digit16 (text .charAt (i + 11 ));
1562
+ if ((byte4 & 0xC0 ) != 0x80 ) {
1563
+ throw new IllegalArgumentException ();
1564
+ }
1565
+ byte4 = byte4 & 0x3F ;
1566
+ if ((byte1 & 0xF8 ) == 0xF0 ) {
1567
+ int codePoint = ((byte1 & 0x07 ) << 0x12 ) | (byte2 << 0x0C ) | (byte3 << 0x06 ) | byte4 ;
1568
+ if (codePoint >= 0x010000 && codePoint <= 0x10FFFF ) {
1569
+ decoded .append (Character .toChars ((codePoint & 0xFFFF ) >>> 10 & 0x3FF | 0xD800 ));
1570
+ decoded .append (Character .toChars (0xDC00 | (codePoint & 0xFFFF ) & 0x3FF ));
1571
+ i += 12 ;
1572
+ continue ;
1573
+ }
1574
+ }
1575
+
1576
+ throw new IllegalArgumentException ();
1577
+ }
1578
+
1579
+ return decoded .toString ();
1580
+ }
1581
+
1460
1582
/**
1461
1583
* Given the original text1, and an encoded string which describes the
1462
1584
* operations required to transform text1 into text2, compute the full diff.
@@ -1483,10 +1605,7 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1483
1605
// decode would change all "+" to " "
1484
1606
param = param .replace ("+" , "%2B" );
1485
1607
try {
1486
- param = URLDecoder .decode (param , "UTF-8" );
1487
- } catch (UnsupportedEncodingException e ) {
1488
- // Not likely on modern system.
1489
- throw new Error ("This system does not support UTF-8." , e );
1608
+ param = this .decodeURI (param );
1490
1609
} catch (IllegalArgumentException e ) {
1491
1610
// Malformed URI sequence.
1492
1611
throw new IllegalArgumentException (
@@ -2269,10 +2388,7 @@ public List<Patch> patch_fromText(String textline)
2269
2388
line = text .getFirst ().substring (1 );
2270
2389
line = line .replace ("+" , "%2B" ); // decode would change all "+" to " "
2271
2390
try {
2272
- line = URLDecoder .decode (line , "UTF-8" );
2273
- } catch (UnsupportedEncodingException e ) {
2274
- // Not likely on modern system.
2275
- throw new Error ("This system does not support UTF-8." , e );
2391
+ line = this .decodeURI (line );
2276
2392
} catch (IllegalArgumentException e ) {
2277
2393
// Malformed URI sequence.
2278
2394
throw new IllegalArgumentException (
0 commit comments