Skip to content

Commit ef95bba

Browse files
committed
Add support for the new Indic_Conjunct_Break property.
1 parent 823a9ce commit ef95bba

10 files changed

+1195
-8
lines changed

CHANGES.md

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
`ID_Compat_Math_{Start,Continue}` properties.
1818
- Add `Uucd.Case.Nfkc_simple_fold.fold`, support for the new
1919
`NFKC_Simple_Casefold` property.
20+
- Add `Uucd.Break.indic_conjunct_break`, support for the new
21+
`Indic_Conjunct_Break` property.
2022

2123
v15.0.0 2022-09-15 Zagreb
2224
-------------------------

src/uucp.mli

-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,6 @@ module White = Uucp__white
9191
{{:http://www.unicode.org/reports/tr44/#Joining_Group}Joining_Group},
9292
{{:http://www.unicode.org/reports/tr44/#Joining_Type}Joining_Type},
9393
{{:http://www.unicode.org/reports/tr44/#Vertical_Orientation}Vertical_Orientation},
94-
{{:http://www.unicode.org/reports/tr44/#Indic_Conjuct_Break}
95-
Indic_Conjuct_Break},
9694
{{:http://www.unicode.org/reports/tr44/#Indic_Syllabic_Category}
9795
Indic_Syllabic_Category},
9896
{{:http://www.unicode.org/reports/tr44/#Indic_Positional_Category}

src/uucp__break.ml

+9
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ module Low = struct
2828

2929
let sentence_max = sentence_max
3030
let sentence_of_int = sentence_of_byte
31+
32+
let indic_conjunct_break u =
33+
Uucp_tmapbyte.get Uucp_break_data.indic_conjunct_break_map (Uchar.to_int u)
34+
35+
let indic_conjunct_break_max = indic_conjunct_break_max
36+
let indic_conjunct_break_of_int = indic_conjunct_break_of_byte
3137
end
3238

3339
let line u = Array.unsafe_get Low.line_of_int (Low.line u)
@@ -36,6 +42,9 @@ let grapheme_cluster u = Array.unsafe_get Low.grapheme_cluster_of_int
3642

3743
let word u = Array.unsafe_get Low.word_of_int (Low.word u)
3844
let sentence u = Array.unsafe_get Low.sentence_of_int (Low.sentence u)
45+
let indic_conjunct_break u =
46+
Array.unsafe_get Low.indic_conjunct_break_of_int
47+
(Low.indic_conjunct_break u)
3948

4049
let east_asian_width u =
4150
Uucp_rmap.get Uucp_break_data.east_asian_width_map (Uchar.to_int u)

src/uucp__break.mli

+30-6
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ type word =
6363
(** The type for word breaks. *)
6464

6565
val pp_word : Format.formatter -> word -> unit
66-
(** [pp_grapheme_cluster ppf g] prints an unspecified representation of [g]
67-
on [ppf]. *)
66+
(** [pp_word ppf b] prints an unspecified representation of [b] on [ppf]. *)
6867

6968
val word : Uchar.t -> word
7069
(** [word u] is [u]'s
@@ -79,14 +78,27 @@ type sentence =
7978
(** The type for sentence breaks. *)
8079

8180
val pp_sentence : Format.formatter -> sentence -> unit
82-
(** [pp_grapheme_cluster ppf g] prints an unspecified representation of [g]
83-
on [ppf]. *)
81+
(** [pp_sentence ppf b] prints an unspecified representation of [b] on [ppf]. *)
8482

8583
val sentence : Uchar.t -> sentence
8684
(** [sentence u] is [u]'s
8785
{{:http://www.unicode.org/reports/tr44/#Sentence_Break}sentence break}
8886
property. *)
8987

88+
(** {1:indic_conjunct_break Indic conjunct break} *)
89+
90+
type indic_conjunct_break = [ `Consonant | `Extend | `Linker | `None ]
91+
(** The type for Indic Conjunct Break. *)
92+
93+
val pp_indic_conjunct_break : Format.formatter -> indic_conjunct_break -> unit
94+
(** [pp_indic_conjunct_break ppf b] prints an unspecified representation of [b]
95+
on [ppf]. *)
96+
97+
val indic_conjunct_break : Uchar.t -> indic_conjunct_break
98+
(** [indic_conjunct_break u] is [u]'s
99+
{{:http://www.unicode.org/reports/tr44/#Indic_Conjunct_Break}
100+
Indic conjunct break} property. *)
101+
90102
(** {1:east_asian_width East Asian width} *)
91103

92104
type east_asian_width = [ `A | `F | `H | `N | `Na | `W ]
@@ -98,8 +110,8 @@ val pp_east_asian_width : Format.formatter -> east_asian_width -> unit
98110

99111
val east_asian_width : Uchar.t -> east_asian_width
100112
(** [east_asian_width u] is [u]'s
101-
{{:http://www.unicode.org/reports/tr44/#East_Asian_Width}East Asian
102-
width} property. *)
113+
{{:http://www.unicode.org/reports/tr44/#East_Asian_Width}East
114+
Asian width} property. *)
103115

104116
(** {1:terminal_width Terminal width} *)
105117

@@ -240,4 +252,16 @@ module Low : sig
240252
val sentence_of_int : sentence array
241253
(** [sentence_of_int.(i)] is the sentence break property value
242254
corresponding to [i]. *)
255+
256+
val indic_conjunct_break : Uchar.t -> int
257+
(** [indic_conjunct_break u] is an integer that can be used with
258+
{!indic_conjunct_break_of_int}. *)
259+
260+
val indic_conjunct_break_max : int
261+
(** [indic_conjunct_break_max] is the maximal value returned by
262+
{!val-indic_conjunct_break_of_int}. *)
263+
264+
val indic_conjunct_break_of_int : indic_conjunct_break array
265+
(** [indic_conjunct_break.(i)] is the Indic conjunct break property
266+
value corresponding to [i]. *)
243267
end

src/uucp_break_base.ml

+18
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,24 @@ let pp_sentence ppf v = Format.fprintf ppf "%s" begin match v with
115115
| `SE -> "SE" | `SP -> "SP" | `ST -> "ST" | `UP -> "UP" | `XX -> "XX"
116116
end
117117

118+
(* Indic conjunct break *)
119+
120+
type indic_conjunct_break =
121+
[ `Consonant | `Extend | `Linker | `None ]
122+
123+
let indic_conjunct_break_of_byte : indic_conjunct_break array =
124+
[| `Consonant; `Extend; `Linker; `None |]
125+
126+
let indic_conjunct_break_max = Array.length indic_conjunct_break_of_byte - 1
127+
128+
let indic_conjunct_break_to_byte = function
129+
| `Consonant -> 0 | `Extend -> 1 | `Linker -> 2 | `None -> 3
130+
131+
let pp_indic_conjunct_break ppf v = Format.fprintf ppf "%s" begin match v with
132+
| `Consonant -> "Consonant" | `Extend -> "Extend" | `Linker -> "Linker"
133+
| `None -> "None"
134+
end
135+
118136
(* East Asian width *)
119137

120138
type east_asian_width = [ `A | `F | `H | `N | `Na | `W ]

0 commit comments

Comments
 (0)