-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathWebUtil.pas
677 lines (637 loc) · 27.9 KB
/
WebUtil.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
unit WebUtil;
{------------------------------------------------------------------------------}
{ (Very) efficient Pascal implementations of : }
{ }
{ -URLEncode/URLDecode (Following the RFC2396 specification) }
{ -HTMLEncode (See: http://www.w3.org/TR/WD-html40-970708/sgml/entities.html) }
{ -XHTML named chars (See: http://www.redstart.com/XHTML/entities.htm) }
{ }
{ Supporting the complete ISO-LATIN1 (8859-1) character set }
{ }
{ (c) 2001, Danny Heijl ([email protected]) }
{ Provided as freeware. Use at your own risk. No strings attached. }
{ (c) 2003, Zdravko Stoychev ([email protected]) }
{ Added XML named character apostrophe (') support }
{ }
{------------------------------------------------------------------------------}
{
*******************************************************************************
* Descriptions: Main Unit for FMA
* $Source: /cvsroot/fma/fma/components/WebUtil.pas,v $
* $Locker: $
*
* Todo:
* - add cyrillic named chars support
*
* Change Log:
* $Log: WebUtil.pas,v $
* Revision 1.6 2005/02/17 14:00:16 z_stoichev
* Fixed space encode/decode issue.
*
* Revision 1.5 2005/02/14 10:22:31 z_stoichev
* Encode Space too.
*
* Revision 1.4 2005/02/08 15:39:09 voxik
* Merged with L10N branch
*
* Revision 1.3.12.1 2005/01/07 17:57:48 expertone
* Merge with MAIN branch
*
* Revision 1.3 2004/03/26 14:27:00 z_stoichev
* Optional encoding of Non US Asci chars.
*
* Revision 1.2 2003/11/28 09:38:08 z_stoichev
* Merged with branch-release-1-1 (Fma 0.10.28c)
*
* Revision 1.1.2.1 2003/11/07 16:45:56 z_stoichev
* Initial checkin.
*
*
*
}
interface
uses SysUtils;
type
EURLDecode = class(Exception);
{ WEB Utility functions }
function URLEncode(Src: string): string;
function URLDecode(Src: string): string;
function HTMLEncode(Src: string; EncodeNonUSASCII: boolean = True): string;
function HTMLDecode(Src: string): string;
implementation
{ URLEncode / URLDecode }
(*
********************
* RFC 2396 states: *
********************
alpha = lowalpha | upalpha
lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
"j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
"s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
"J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
"S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
"8" | "9"
alphanum = alpha | digit
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","
unreserved = alphanum | mark
mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
escaped = "%" hex hex
hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
"a" | "b" | "c" | "d" | "e" | "f"
UNSAFE :
control = <US-ASCII coded characters 00-1F and 7F hexadecimal>
space = <US-ASCII coded character 20 hexadecimal>
delims = "<" | ">" | "#" | "%" | <">
unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
*)
{ URLEncode }
function URLEncode(Src: string) : string;
const
Hex : Array[0..15] of Char = (
'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' // do not localize
);
var
i : Integer;
Buf, P: PChar;
ch: Char;
begin
Result := '';
GetMem(Buf, (Length(src) * 3) + 3);
try
P := Buf;
for i := 1 to Length(src) do begin
ch := src[i];
if (ch in ['a'..'z']) or {lowaplha}
(ch in ['A'..'Z']) or {upalpha}
(ch in ['0'..'9']) or {digit}
(ch in ['-','_','.','!','~','*','''','(',')']) then begin {mark}
P^ := src[i]; Inc(P);
end else begin
{ handcoded IntToHex to avoid string handling overhead for each character }
P^ := '%'; Inc(P);
P^ := Hex[((Ord(ch) shr 4) and $0f)]; Inc(P);
P^ := Hex[Ord(ch) and $0f]; Inc(P);
end;
end; { for }
SetString(Result, Buf, P - Buf);
finally
FreeMem(Buf);
end;
end;
{ URLDecode }
function URLDecode(src: string): string;
const
LCHexOffset = Ord('a') - 10; // do not localize
UCHexOffset = Ord('A') - 10; // do not localize
var
i, l: integer;
Buf, P: PChar;
{ convert HEX digit in ch to HEX nibble }
function HexDigit(ch: integer): integer;
begin
if Char(ch) in ['0'..'9'] then begin // do not localize
Result := ch - Ord('0');
end else begin
if Char(ch) in ['A'..'F'] then begin // do not localize
Result := ch - UCHexOffset;
end else begin
if Char(ch) in ['a'..'f'] then begin // do not localize
Result := ch - LCHexOffset;
end else begin
raise EURLDecode .Create('Invalid HEX digits in string passed to URLDecode.');
end;
end;
end;
end;
begin
Result := '';
GetMem(Buf, Length(src) + 2);
try
P := Buf;
l := Length(src);
i := 1;
while i <= l do begin
if src[i] = '%' then begin // do not localize
if (i + 2) > l then
raise EURLDecode.Create('Invalid URL-encoded string passed to URLDecode.');
Inc(i);
{ handcoded HexToInt to avoid string handling overhead for each character }
P^:= Char((HexDigit(Ord(src[i])) shl 4) or HexDigit(Ord(src[i + 1])));
Inc(i, 2); Inc(P);
end else begin
P^:= src[i]; Inc(P); Inc(i);
end;
end;
SetString(Result, Buf, P - Buf);
finally
FreeMem(Buf);
end;
end;
{ HTML_Entities }
(*
From http://www.w3.org/TR/WD-html40-970708/sgml/entities.html :
ISO-Latin1 HTML entities :
<!-- Portions © International Organization for Standardization 1986
Permission to copy in any form is granted for use with
conforming SGML systems and applications as defined in
ISO 8879, provided this notice is included in all copies.
-->
<!-- C0 Controls and Basic Latin -->
<!ENTITY quot CDATA """ -- quotation mark, =apl quote, u+0022 ISOnum -->
<!ENTITY amp CDATA "&" -- ampersand, u+0026 ISOnum -->
<!ENTITY lt CDATA "<" -- less-than sign, u+003C ISOnum -->
<!ENTITY gt CDATA ">" -- greater-than sign, u+003E ISOnum -->
<!-- Character entity set. Typical invocation:
<!ENTITY % HTMLlat1 PUBLIC
"-//W3C//ENTITIES Full Latin 1//EN//HTML">
%HTMLlat1;
-->
<!ENTITY nbsp CDATA " " -- no-break space -->
<!ENTITY iexcl CDATA "¡" -- inverted exclamation mark -->
<!ENTITY cent CDATA "¢" -- cent sign -->
<!ENTITY pound CDATA "£" -- pound sterling sign -->
<!ENTITY curren CDATA "¤" -- general currency sign -->
<!ENTITY yen CDATA "¥" -- yen sign -->
<!ENTITY brvbar CDATA "¦" -- broken (vertical) bar -->
<!ENTITY sect CDATA "§" -- section sign -->
<!ENTITY uml CDATA "¨" -- umlaut (dieresis) -->
<!ENTITY copy CDATA "©" -- copyright sign -->
<!ENTITY ordf CDATA "ª" -- ordinal indicator, feminine -->
<!ENTITY laquo CDATA "«" -- angle quotation mark, left -->
<!ENTITY not CDATA "¬" -- not sign -->
<!ENTITY shy CDATA "­" -- soft hyphen -->
<!ENTITY reg CDATA "®" -- registered sign -->
<!ENTITY macr CDATA "¯" -- macron -->
<!ENTITY deg CDATA "°" -- degree sign -->
<!ENTITY plusmn CDATA "±" -- plus-or-minus sign -->
<!ENTITY sup2 CDATA "²" -- superscript two -->
<!ENTITY sup3 CDATA "³" -- superscript three -->
<!ENTITY acute CDATA "´" -- acute accent -->
<!ENTITY micro CDATA "µ" -- micro sign -->
<!ENTITY para CDATA "¶" -- pilcrow (paragraph sign) -->
<!ENTITY middot CDATA "·" -- middle dot -->
<!ENTITY cedil CDATA "¸" -- cedilla -->
<!ENTITY sup1 CDATA "¹" -- superscript one -->
<!ENTITY ordm CDATA "º" -- ordinal indicator, masculine -->
<!ENTITY raquo CDATA "»" -- angle quotation mark, right -->
<!ENTITY frac14 CDATA "¼" -- fraction one-quarter -->
<!ENTITY frac12 CDATA "½" -- fraction one-half -->
<!ENTITY frac34 CDATA "¾" -- fraction three-quarters -->
<!ENTITY iquest CDATA "¿" -- inverted question mark -->
<!ENTITY Agrave CDATA "À" -- capital A, grave accent -->
<!ENTITY Aacute CDATA "Á" -- capital A, acute accent -->
<!ENTITY Acirc CDATA "Â" -- capital A, circumflex accent -->
<!ENTITY Atilde CDATA "Ã" -- capital A, tilde -->
<!ENTITY Auml CDATA "Ä" -- capital A, dieresis or umlaut mark -->
<!ENTITY Aring CDATA "Å" -- capital A, ring -->
<!ENTITY AElig CDATA "Æ" -- capital AE diphthong (ligature) -->
<!ENTITY Ccedil CDATA "Ç" -- capital C, cedilla -->
<!ENTITY Egrave CDATA "È" -- capital E, grave accent -->
<!ENTITY Eacute CDATA "É" -- capital E, acute accent -->
<!ENTITY Ecirc CDATA "Ê" -- capital E, circumflex accent -->
<!ENTITY Euml CDATA "Ë" -- capital E, dieresis or umlaut mark -->
<!ENTITY Igrave CDATA "Ì" -- capital I, grave accent -->
<!ENTITY Iacute CDATA "Í" -- capital I, acute accent -->
<!ENTITY Icirc CDATA "Î" -- capital I, circumflex accent -->
<!ENTITY Iuml CDATA "Ï" -- capital I, dieresis or umlaut mark -->
<!ENTITY ETH CDATA "Ð" -- capital Eth, Icelandic -->
<!ENTITY Ntilde CDATA "Ñ" -- capital N, tilde -->
<!ENTITY Ograve CDATA "Ò" -- capital O, grave accent -->
<!ENTITY Oacute CDATA "Ó" -- capital O, acute accent -->
<!ENTITY Ocirc CDATA "Ô" -- capital O, circumflex accent -->
<!ENTITY Otilde CDATA "Õ" -- capital O, tilde -->
<!ENTITY Ouml CDATA "Ö" -- capital O, dieresis or umlaut mark -->
<!ENTITY times CDATA "×" -- multiply sign -->
<!ENTITY Oslash CDATA "Ø" -- capital O, slash -->
<!ENTITY Ugrave CDATA "Ù" -- capital U, grave accent -->
<!ENTITY Uacute CDATA "Ú" -- capital U, acute accent -->
<!ENTITY Ucirc CDATA "Û" -- capital U, circumflex accent -->
<!ENTITY Uuml CDATA "Ü" -- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute CDATA "Ý" -- capital Y, acute accent -->
<!ENTITY THORN CDATA "Þ" -- capital THORN, Icelandic -->
<!ENTITY szlig CDATA "ß" -- small sharp s, German (sz ligature) -->
<!ENTITY agrave CDATA "à" -- small a, grave accent -->
<!ENTITY aacute CDATA "á" -- small a, acute accent -->
<!ENTITY acirc CDATA "â" -- small a, circumflex accent -->
<!ENTITY atilde CDATA "ã" -- small a, tilde -->
<!ENTITY auml CDATA "ä" -- small a, dieresis or umlaut mark -->
<!ENTITY aring CDATA "å" -- small a, ring -->
<!ENTITY aelig CDATA "æ" -- small ae diphthong (ligature) -->
<!ENTITY ccedil CDATA "ç" -- small c, cedilla -->
<!ENTITY egrave CDATA "è" -- small e, grave accent -->
<!ENTITY eacute CDATA "é" -- small e, acute accent -->
<!ENTITY ecirc CDATA "ê" -- small e, circumflex accent -->
<!ENTITY euml CDATA "ë" -- small e, dieresis or umlaut mark -->
<!ENTITY igrave CDATA "ì" -- small i, grave accent -->
<!ENTITY iacute CDATA "í" -- small i, acute accent -->
<!ENTITY icirc CDATA "î" -- small i, circumflex accent -->
<!ENTITY iuml CDATA "ï" -- small i, dieresis or umlaut mark -->
<!ENTITY eth CDATA "ð" -- small eth, Icelandic -->
<!ENTITY ntilde CDATA "ñ" -- small n, tilde -->
<!ENTITY ograve CDATA "ò" -- small o, grave accent -->
<!ENTITY oacute CDATA "ó" -- small o, acute accent -->
<!ENTITY ocirc CDATA "ô" -- small o, circumflex accent -->
<!ENTITY otilde CDATA "õ" -- small o, tilde -->
<!ENTITY ouml CDATA "ö" -- small o, dieresis or umlaut mark -->
<!ENTITY divide CDATA "÷" -- divide sign -->
<!ENTITY oslash CDATA "ø" -- small o, slash -->
<!ENTITY ugrave CDATA "ù" -- small u, grave accent -->
<!ENTITY uacute CDATA "ú" -- small u, acute accent -->
<!ENTITY ucirc CDATA "û" -- small u, circumflex accent -->
<!ENTITY uuml CDATA "ü" -- small u, dieresis or umlaut mark -->
<!ENTITY yacute CDATA "ý" -- small y, acute accent -->
<!ENTITY thorn CDATA "þ" -- small thorn, Icelandic -->
<!ENTITY yuml CDATA "ÿ" -- small y, dieresis or umlaut mark -->
*)
const HTML_Entities: Array[0..95] of string = ( // do not localize
'nbsp', // CDATA " " -- no-break space
'iexcl', // CDATA "¡" -- inverted exclamation mark
'cent', // CDATA "¢" -- cent sign
'pound', // CDATA "£" -- pound sterling sign
'curren', // CDATA "¤" -- general currency sign
'yen', // CDATA "¥" -- yen sign
'brvbar', // CDATA "¦" -- broken (vertical) bar
'sect', // CDATA "§" -- section sign
'uml', // CDATA "¨" -- umlaut (dieresis)
'copy', // CDATA "©" -- copyright sign
'ordf', // CDATA "ª" -- ordinal indicator, feminine
'laquo', // CDATA "«" -- angle quotation mark, left
'not', // CDATA "¬" -- not sign
'shy', // CDATA "­" -- soft hyphen
'reg', // CDATA "®" -- registered sign
'macr', // CDATA "¯" -- macron
'deg', // CDATA "°" -- degree sign
'plusmn', // CDATA "±" -- plus-or-minus sign
'sup2', // CDATA "²" -- superscript two
'sup3', // CDATA "³" -- superscript three
'acute', // CDATA "´" -- acute accent
'micro', // CDATA "µ" -- micro sign
'para', // CDATA "¶" -- pilcrow (paragraph sign)
'middot', // CDATA "·" -- middle dot
'cedil', // CDATA "¸" -- cedilla
'sup1', // CDATA "¹" -- superscript one
'ordm', // CDATA "º" -- ordinal indicator, masculine
'raquo', // CDATA "»" -- angle quotation mark, right
'frac14', // CDATA "¼" -- fraction one-quarter
'frac12', // CDATA "½" -- fraction one-half
'frac34', // CDATA "¾" -- fraction three-quarters
'iquest', // CDATA "¿" -- inverted question mark
'Agrave', // CDATA "À" -- capital A, grave accent
'Aacute', // CDATA "Á" -- capital A, acute accent
'Acirc', // CDATA "Â" -- capital A, circumflex accent
'Atilde', // CDATA "Ã" -- capital A, tilde
'Auml', // CDATA "Ä" -- capital A, dieresis or umlaut mark
'Aring', // CDATA "Å" -- capital A, ring
'AElig', // CDATA "Æ" -- capital AE diphthong (ligature)
'Ccedil', // CDATA "Ç" -- capital C, cedilla
'Egrave', // CDATA "È" -- capital E, grave accent
'Eacute', // CDATA "É" -- capital E, acute accent
'Ecirc', // CDATA "Ê" -- capital E, circumflex accent
'Euml', // CDATA "Ë" -- capital E, dieresis or umlaut mark
'Igrave', // CDATA "Ì" -- capital I, grave accent
'Iacute', // CDATA "Í" -- capital I, acute accent
'Icirc', // CDATA "Î" -- capital I, circumflex accent
'Iuml', // CDATA "Ï" -- capital I, dieresis or umlaut mark
'ETH', // CDATA "Ð" -- capital Eth, Icelandic
'Ntilde', // CDATA "Ñ" -- capital N, tilde
'Ograve', // CDATA "Ò" -- capital O, grave accent
'Oacute', // CDATA "Ó" -- capital O, acute accent
'Ocirc', // CDATA "Ô" -- capital O, circumflex accent
'Otilde', // CDATA "Õ" -- capital O, tilde
'Ouml', // CDATA "Ö" -- capital O, dieresis or umlaut mark
'times', // CDATA "×" -- multiply sign
'Oslash', // CDATA "Ø" -- capital O, slash
'Ugrave', // CDATA "Ù" -- capital U, grave accent
'Uacute', // CDATA "Ú" -- capital U, acute accent
'Ucirc', // CDATA "Û" -- capital U, circumflex accent
'Uuml', // CDATA "Ü" -- capital U, dieresis or umlaut mark
'Yacute', // CDATA "Ý" -- capital Y, acute accent
'THORN', // CDATA "Þ" -- capital THORN, Icelandic
'szlig', // CDATA "ß" -- small sharp s, German (sz ligature)
'agrave', // CDATA "à" -- small a, grave accent
'aacute', // CDATA "á" -- small a, acute accent
'acirc', // CDATA "â" -- small a, circumflex accent
'atilde', // CDATA "ã" -- small a, tilde
'auml', // CDATA "ä" -- small a, dieresis or umlaut mark
'aring', // CDATA "å" -- small a, ring
'aelig', // CDATA "æ" -- small ae diphthong (ligature)
'ccedil', // CDATA "ç" -- small c, cedilla
'egrave', // CDATA "è" -- small e, grave accent
'eacute', // CDATA "é" -- small e, acute accent
'ecirc', // CDATA "ê" -- small e, circumflex accent
'euml', // CDATA "ë" -- small e, dieresis or umlaut mark
'igrave', // CDATA "ì" -- small i, grave accent
'iacute', // CDATA "í" -- small i, acute accent
'icirc', // CDATA "î" -- small i, circumflex accent
'iuml', // CDATA "ï" -- small i, dieresis or umlaut mark
'eth', // CDATA "ð" -- small eth, Icelandic
'ntilde', // CDATA "ñ" -- small n, tilde
'ograve', // CDATA "ò" -- small o, grave accent
'oacute', // CDATA "ó" -- small o, acute accent
'ocirc', // CDATA "ô" -- small o, circumflex accent
'otilde', // CDATA "õ" -- small o, tilde
'ouml', // CDATA "ö" -- small o, dieresis or umlaut mark
'divide', // CDATA "÷" -- divide sign
'oslash', // CDATA "ø" -- small o, slash
'ugrave', // CDATA "ù" -- small u, grave accent
'uacute', // CDATA "ú" -- small u, acute accent
'ucirc', // CDATA "û" -- small u, circumflex accent
'uuml', // CDATA "ü" -- small u, dieresis or umlaut mark
'yacute', // CDATA "ý" -- small y, acute accent
'thorn', // CDATA "þ" -- small thorn, Icelandic
'yuml'); // CDATA "ÿ" -- small y, dieresis or umlaut mark
function HTMLEncode(Src: string; EncodeNonUSASCII: boolean): string;
var
i, j, k: integer;
Buf, P: PChar;
ch: Integer;
begin
Result := '';
if Length(src) = 0 then exit;
GetMem(Buf, (Length(src) * 8) + 8); // to be on the *very* safe side
try
P := Buf;
for i := 1 to Length(src) do begin
ch := Ord(src[i]);
case ch of
32: // space
begin
Move('%20', P^, 3); // do not localize
Inc(P, 3);
end;
34: // quot
begin
Move('"', P^, 6); // do not localize
Inc(P, 6);
end;
38: // amp
begin
Move('&', P^, 5); // do not localize
Inc(P, 5);
end;
39: // apos
begin
Move(''', P^, 6); // do not localize
Inc(P, 6);
end;
60: // lt
begin
Move('<', P^, 4); // do not localize
Inc(P, 4);
end;
62: // gt
begin
Move('>', P^, 4); // do not localize
Inc(P, 4);
end;
160..255: // the NON-USASCII characters
if EncodeNonUSASCII then
begin
j := ch - 160;
P^:= '&'; Inc(P); // do not localize
for k := 1 to Length(HTML_Entities[j]) do begin
P^ := HTML_Entities[j][k];
Inc(P)
end;
P^:= ';'; Inc(P);
end
else
begin
P^:= Char(ch); Inc(P);
end;
else
begin
P^:= Char(ch); Inc(P);
end;
end;
end;
SetString(Result, Buf, P - Buf);
finally
FreeMem(Buf);
end;
end;
{
XML 1.0 introduces a new named character reference ''' to refer
to U+0027 APOSTROPHE. This entity wasn't included in any HTML version,
but it is not listed as compatibility issue in appendix C of XHTML 1.0.
When delivering XHTML documents as text/html current browsers don't
recognize this entity. There is trouble ticket #107 in the Voyager Issue
Tracking System with a note, that the HTML WG decided to add this entity
to HTML 4.01; HTML 4.01 however doesn't include this entity and it
appears to me that this decision was made after HTML 4.01 was published.
The current errata for HTML 4 doesn't list the omission of '
either.
I suggest to discard the idea of adding ' to HTML 4.01 since it
would require to change the relevant DTDs and this is not possible in a
usable fashion through an errata; I suggest further adding another item
in appendix C of XHTML 1.0 that read e.g.
C.xx Named Character Reference '
The named character reference ' (the apostrophe, U+0027) was
introduced in XML 1.0 but didn't appear in former HTML versions.
Authors should therefore use ' instead of ' to work as
expected in HTML 4 user agents.
}
function HTMLDecode(Src: string): string;
var
i: integer;
begin
Result := '';
i := 1;
while i <= Length(Src) do begin
if Copy(Src,i,3) = '%20' then begin // do not localize
Result := Result + ' ';
inc(i,3);
end
else
if Copy(Src,i,6) = '"' then begin // do not localize
Result := Result + '"';
inc(i,6);
end
else
if Copy(Src,i,6) = ''' then begin // do not localize
Result := Result + '''';
inc(i,6);
end
else
if Copy(Src,i,5) = '&' then begin
Result := Result + '&';
inc(i,5);
end
else
if Copy(Src,i,4) = '<' then begin // do not localize
Result := Result + '<';
inc(i,4);
end
else
if Copy(Src,i,4) = '>' then begin // do not localize
Result := Result + '>';
inc(i,4);
end
else begin
Result := Result + Src[i];
inc(i);
end;
// TODO: add support for // the NON-USASCII characters
end;
end;
{
<!-- (C) International Organization for Standardization 1986
Permission to copy in any form is granted for use with
conforming SGML systems and applications as defined in
ISO 8879, provided this notice is included in all copies.
-->
<!-- Character entity set. Typical invocation:
<!ENTITY % ISOcyr1 PUBLIC
"ISO 8879:1986//ENTITIES Russian Cyrillic//EN">
%ISOcyr1;
-->
<!ENTITY acy SDATA "[acy ]"--=small a, Cyrillic-->
<!ENTITY Acy SDATA "[Acy ]"--=capital A, Cyrillic-->
<!ENTITY bcy SDATA "[bcy ]"--=small be, Cyrillic-->
<!ENTITY Bcy SDATA "[Bcy ]"--=capital BE, Cyrillic-->
<!ENTITY vcy SDATA "[vcy ]"--=small ve, Cyrillic-->
<!ENTITY Vcy SDATA "[Vcy ]"--=capital VE, Cyrillic-->
<!ENTITY gcy SDATA "[gcy ]"--=small ghe, Cyrillic-->
<!ENTITY Gcy SDATA "[Gcy ]"--=capital GHE, Cyrillic-->
<!ENTITY dcy SDATA "[dcy ]"--=small de, Cyrillic-->
<!ENTITY Dcy SDATA "[Dcy ]"--=capital DE, Cyrillic-->
<!ENTITY iecy SDATA "[iecy ]"--=small ie, Cyrillic-->
<!ENTITY IEcy SDATA "[IEcy ]"--=capital IE, Cyrillic-->
<!ENTITY iocy SDATA "[iocy ]"--=small io, Russian-->
<!ENTITY IOcy SDATA "[IOcy ]"--=capital IO, Russian-->
<!ENTITY zhcy SDATA "[zhcy ]"--=small zhe, Cyrillic-->
<!ENTITY ZHcy SDATA "[ZHcy ]"--=capital ZHE, Cyrillic-->
<!ENTITY zcy SDATA "[zcy ]"--=small ze, Cyrillic-->
<!ENTITY Zcy SDATA "[Zcy ]"--=capital ZE, Cyrillic-->
<!ENTITY icy SDATA "[icy ]"--=small i, Cyrillic-->
<!ENTITY Icy SDATA "[Icy ]"--=capital I, Cyrillic-->
<!ENTITY jcy SDATA "[jcy ]"--=small short i, Cyrillic-->
<!ENTITY Jcy SDATA "[Jcy ]"--=capital short I, Cyrillic-->
<!ENTITY kcy SDATA "[kcy ]"--=small ka, Cyrillic-->
<!ENTITY Kcy SDATA "[Kcy ]"--=capital KA, Cyrillic-->
<!ENTITY lcy SDATA "[lcy ]"--=small el, Cyrillic-->
<!ENTITY Lcy SDATA "[Lcy ]"--=capital EL, Cyrillic-->
<!ENTITY mcy SDATA "[mcy ]"--=small em, Cyrillic-->
<!ENTITY Mcy SDATA "[Mcy ]"--=capital EM, Cyrillic-->
<!ENTITY ncy SDATA "[ncy ]"--=small en, Cyrillic-->
<!ENTITY Ncy SDATA "[Ncy ]"--=capital EN, Cyrillic-->
<!ENTITY ocy SDATA "[ocy ]"--=small o, Cyrillic-->
<!ENTITY Ocy SDATA "[Ocy ]"--=capital O, Cyrillic-->
<!ENTITY pcy SDATA "[pcy ]"--=small pe, Cyrillic-->
<!ENTITY Pcy SDATA "[Pcy ]"--=capital PE, Cyrillic-->
<!ENTITY rcy SDATA "[rcy ]"--=small er, Cyrillic-->
<!ENTITY Rcy SDATA "[Rcy ]"--=capital ER, Cyrillic-->
<!ENTITY scy SDATA "[scy ]"--=small es, Cyrillic-->
<!ENTITY Scy SDATA "[Scy ]"--=capital ES, Cyrillic-->
<!ENTITY tcy SDATA "[tcy ]"--=small te, Cyrillic-->
<!ENTITY Tcy SDATA "[Tcy ]"--=capital TE, Cyrillic-->
<!ENTITY ucy SDATA "[ucy ]"--=small u, Cyrillic-->
<!ENTITY Ucy SDATA "[Ucy ]"--=capital U, Cyrillic-->
<!ENTITY fcy SDATA "[fcy ]"--=small ef, Cyrillic-->
<!ENTITY Fcy SDATA "[Fcy ]"--=capital EF, Cyrillic-->
<!ENTITY khcy SDATA "[khcy ]"--=small ha, Cyrillic-->
<!ENTITY KHcy SDATA "[KHcy ]"--=capital HA, Cyrillic-->
<!ENTITY tscy SDATA "[tscy ]"--=small tse, Cyrillic-->
<!ENTITY TScy SDATA "[TScy ]"--=capital TSE, Cyrillic-->
<!ENTITY chcy SDATA "[chcy ]"--=small che, Cyrillic-->
<!ENTITY CHcy SDATA "[CHcy ]"--=capital CHE, Cyrillic-->
<!ENTITY shcy SDATA "[shcy ]"--=small sha, Cyrillic-->
<!ENTITY SHcy SDATA "[SHcy ]"--=capital SHA, Cyrillic-->
<!ENTITY shchcy SDATA "[shchcy]"--=small shcha, Cyrillic-->
<!ENTITY SHCHcy SDATA "[SHCHcy]"--=capital SHCHA, Cyrillic-->
<!ENTITY hardcy SDATA "[hardcy]"--=small hard sign, Cyrillic-->
<!ENTITY HARDcy SDATA "[HARDcy]"--=capital HARD sign, Cyrillic-->
<!ENTITY ycy SDATA "[ycy ]"--=small yeru, Cyrillic-->
<!ENTITY Ycy SDATA "[Ycy ]"--=capital YERU, Cyrillic-->
<!ENTITY softcy SDATA "[softcy]"--=small soft sign, Cyrillic-->
<!ENTITY SOFTcy SDATA "[SOFTcy]"--=capital SOFT sign, Cyrillic-->
<!ENTITY ecy SDATA "[ecy ]"--=small e, Cyrillic-->
<!ENTITY Ecy SDATA "[Ecy ]"--=capital E, Cyrillic-->
<!ENTITY yucy SDATA "[yucy ]"--=small yu, Cyrillic-->
<!ENTITY YUcy SDATA "[YUcy ]"--=capital YU, Cyrillic-->
<!ENTITY yacy SDATA "[yacy ]"--=small ya, Cyrillic-->
<!ENTITY YAcy SDATA "[YAcy ]"--=capital YA, Cyrillic-->
<!ENTITY numero SDATA "[numero]"--=numero sign-->
}
{
<!-- (C) International Organization for Standardization 1986
Permission to copy in any form is granted for use with
conforming SGML systems and applications as defined in
ISO 8879, provided this notice is included in all copies.
-->
<!-- Character entity set. Typical invocation:
<!ENTITY % ISOcyr2 PUBLIC
"ISO 8879:1986//ENTITIES Non-Russian Cyrillic//EN">
%ISOcyr2;
-->
<!ENTITY djcy SDATA "[djcy ]"--=small dje, Serbian-->
<!ENTITY DJcy SDATA "[DJcy ]"--=capital DJE, Serbian-->
<!ENTITY gjcy SDATA "[gjcy ]"--=small gje, Macedonian-->
<!ENTITY GJcy SDATA "[GJcy ]"--=capital GJE Macedonian-->
<!ENTITY jukcy SDATA "[jukcy ]"--=small je, Ukrainian-->
<!ENTITY Jukcy SDATA "[Jukcy ]"--=capital JE, Ukrainian-->
<!ENTITY dscy SDATA "[dscy ]"--=small dse, Macedonian-->
<!ENTITY DScy SDATA "[DScy ]"--=capital DSE, Macedonian-->
<!ENTITY iukcy SDATA "[iukcy ]"--=small i, Ukrainian-->
<!ENTITY Iukcy SDATA "[Iukcy ]"--=capital I, Ukrainian-->
<!ENTITY yicy SDATA "[yicy ]"--=small yi, Ukrainian-->
<!ENTITY YIcy SDATA "[YIcy ]"--=capital YI, Ukrainian-->
<!ENTITY jsercy SDATA "[jsercy]"--=small je, Serbian-->
<!ENTITY Jsercy SDATA "[Jsercy]"--=capital JE, Serbian-->
<!ENTITY ljcy SDATA "[ljcy ]"--=small lje, Serbian-->
<!ENTITY LJcy SDATA "[LJcy ]"--=capital LJE, Serbian-->
<!ENTITY njcy SDATA "[njcy ]"--=small nje, Serbian-->
<!ENTITY NJcy SDATA "[NJcy ]"--=capital NJE, Serbian-->
<!ENTITY tshcy SDATA "[tshcy ]"--=small tshe, Serbian-->
<!ENTITY TSHcy SDATA "[TSHcy ]"--=capital TSHE, Serbian-->
<!ENTITY kjcy SDATA "[kjcy ]"--=small kje Macedonian-->
<!ENTITY KJcy SDATA "[KJcy ]"--=capital KJE, Macedonian-->
<!ENTITY ubrcy SDATA "[ubrcy ]"--=small u, Byelorussian-->
<!ENTITY Ubrcy SDATA "[Ubrcy ]"--=capital U, Byelorussian-->
<!ENTITY dzcy SDATA "[dzcy ]"--=small dze, Serbian-->
<!ENTITY DZcy SDATA "[DZcy ]"--=capital dze, Serbian-->
}
end.