Skip to content

Commit

Permalink
wezterm: add experimental_bidi config option and very basic bidi
Browse files Browse the repository at this point in the history
This commit is larger than it appears to due fanout from threading
through bidi parameters.  The main changes are:

* When clustering cells, add an additional phase to resolve embedding
  levels and further sub-divide a cluster based on the resolved bidi
  runs; this is where we get the direction for a run and this needs
  to be passed through to the shaper.
* When doing bidi, the forced cluster boundary hack that we use to
  de-ligature when cursoring through text needs to be disabled,
  otherwise the cursor appears to push/rotate the text in that
  cluster when moving through it! We'll need to find a different
  way to handle shading the cursor that eliminates the original
  cursor/ligature/black issue.
* In the shaper, the logic for coalescing unresolved runs for font
  fallback assumed LTR and needed to be adjusted to cluster RTL.
  That meant also computing a little index of codepoint lengths.
* Added `experimental_bidi` boolean option that defaults to false.
  When enabled, it activates the bidi processing phase in clustering
  with a strong hint that the paragraph is LTR.

This implementation is incomplete and/or wrong for a number of cases:

* The config option should probably allow specifying the paragraph
  direction hint to use by default.
* https://terminal-wg.pages.freedesktop.org/bidi/recommendation/paragraphs.html
  recommends that bidi be applied to logical lines, not physical
  lines (or really: ranges within physical lines) that we're doing
  at the moment
* The paragraph direction hint should be overridden by cell attributes
  and other escapes; see 85a6b17

and probably others.

However, as of this commit, if you `experimental_bidi=true` then

```
echo This is RTL -> عربي فارسی bidi
```

(that text was sourced from:
microsoft/terminal#538 (comment))

then wezterm will display the text in the same order as the text
renders in Chrome for that github comment.

```
; ./target/debug/wezterm --config experimental_bidi=false ls-fonts --text "عربي فارسی ->"
LeftToRight
 0 ع    \u{639}      x_adv=8  glyph=300  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 2 ر    \u{631}      x_adv=3.78125 glyph=273  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 4 ب    \u{628}      x_adv=4  glyph=244  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 6 ي    \u{64a}      x_adv=4  glyph=363  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 8      \u{20}       x_adv=8  glyph=2    wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
 9 ف    \u{641}      x_adv=11 glyph=328  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
11 ا    \u{627}      x_adv=4  glyph=240  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
13 ر    \u{631}      x_adv=3.78125 glyph=273  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
15 س    \u{633}      x_adv=10 glyph=278  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
17 ی    \u{6cc}      x_adv=4  glyph=664  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
19      \u{20}       x_adv=8  glyph=2    wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
20 -    \u{2d}       x_adv=8  glyph=276  wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
21 >    \u{3e}       x_adv=8  glyph=338  wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
```

```
; ./target/debug/wezterm --config experimental_bidi=true ls-fonts --text "عربي فارسی ->"
RightToLeft
17 ی    \u{6cc}      x_adv=9  glyph=906  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
15 س    \u{633}      x_adv=10 glyph=277  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
13 ر    \u{631}      x_adv=4.78125 glyph=272  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
11 ا    \u{627}      x_adv=4  glyph=241  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 9 ف    \u{641}      x_adv=5  glyph=329  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 8      \u{20}       x_adv=8  glyph=2    wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
 6 ي    \u{64a}      x_adv=9  glyph=904  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 4 ب    \u{628}      x_adv=4  glyph=243  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 2 ر    \u{631}      x_adv=5  glyph=273  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
 0 ع    \u{639}      x_adv=6  glyph=301  wezterm.font(".Geeza Pro Interface", {weight="Regular", stretch="Normal", italic=false})
                                      /System/Library/Fonts/GeezaPro.ttc index=2 variation=0, CoreText
LeftToRight
 0      \u{20}       x_adv=8  glyph=2    wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
 1 -    \u{2d}       x_adv=8  glyph=480  wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
 2 >    \u{3e}       x_adv=8  glyph=470  wezterm.font("Operator Mono SSm Lig", {weight="DemiLight", stretch="Normal", italic=false})
                                      /Users/wez/.fonts/OperatorMonoSSmLig-Medium.otf, FontDirs
;
```

refs: #784
  • Loading branch information
wez committed Jan 25, 2022
1 parent 601a85e commit 0324ff6
Show file tree
Hide file tree
Showing 17 changed files with 272 additions and 58 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions bidi/examples/shaping.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use wezterm_bidi::{BidiContext, Direction};
use wezterm_bidi::{BidiContext, Direction, ParagraphDirectionHint};

fn main() {
// The UBA is strongly coupled with codepoints and indices into the
Expand All @@ -12,10 +12,10 @@ fn main() {
// Leave it to the algorithm to determine the paragraph direction.
// If you have some higher level understanding or override for the
// direction, you can set `direction` accordingly.
let direction: Option<Direction> = None;
let hint = ParagraphDirectionHint::AutoLeftToRight;

// Resolve the embedding levels for our paragraph.
context.resolve_paragraph(&paragraph, direction);
context.resolve_paragraph(&paragraph, hint);

/// In order to layout the text, we need to feed information to a shaper.
/// For the purposes of example, we're sketching out a stub shaper interface
Expand Down
41 changes: 24 additions & 17 deletions bidi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ pub use bidi_class::BidiClass;
pub use direction::Direction;
pub use level::Level;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ParagraphDirectionHint {
LeftToRight,
RightToLeft,
AutoLeftToRight,
}

#[derive(Debug, Default)]
pub struct BidiContext {
orig_char_types: Vec<BidiClass>,
Expand Down Expand Up @@ -275,9 +282,9 @@ impl BidiContext {
}

/// <http://unicode.org/reports/tr9/>
pub fn resolve_paragraph(&mut self, paragraph: &[char], dir: Option<Direction>) {
pub fn resolve_paragraph(&mut self, paragraph: &[char], hint: ParagraphDirectionHint) {
self.populate_char_types(paragraph);
self.resolve(dir, paragraph);
self.resolve(hint, paragraph);
}

/// BD1: The bidirectional character types are values assigned to each
Expand All @@ -289,21 +296,21 @@ impl BidiContext {
.extend(paragraph.iter().map(|&c| bidi_class_for_char(c)));
}

pub fn set_char_types(&mut self, char_types: &[BidiClass], dir: Option<Direction>) {
pub fn set_char_types(&mut self, char_types: &[BidiClass], hint: ParagraphDirectionHint) {
self.orig_char_types.clear();
self.orig_char_types.extend(char_types);
self.resolve(dir, &[]);
self.resolve(hint, &[]);
}

fn resolve(&mut self, dir: Option<Direction>, paragraph: &[char]) {
fn resolve(&mut self, hint: ParagraphDirectionHint, paragraph: &[char]) {
trace!("\n**** resolve \n");
self.char_types.clear();
self.char_types.extend(self.orig_char_types.iter());

self.base_level = match dir {
Some(Direction::LeftToRight) => Level(0),
Some(Direction::RightToLeft) => Level(1),
None => paragraph_level(&self.char_types, false),
self.base_level = match hint {
ParagraphDirectionHint::LeftToRight => Level(0),
ParagraphDirectionHint::RightToLeft => Level(1),
ParagraphDirectionHint::AutoLeftToRight => paragraph_level(&self.char_types, false),
};

self.dump_state("before X1-X8");
Expand Down Expand Up @@ -1857,7 +1864,7 @@ mod tests {
let text = vec!['א', 'ב', 'ג', 'a', 'b', 'c'];

let mut context = BidiContext::new();
context.resolve_paragraph(&text, None);
context.resolve_paragraph(&text, ParagraphDirectionHint::AutoLeftToRight);
k9::snapshot!(
context.runs().collect::<Vec<_>>(),
"
Expand Down Expand Up @@ -1971,9 +1978,9 @@ mod tests {
context.resolve_paragraph(
&codepoints,
match direction {
0 => Some(Direction::LeftToRight),
1 => Some(Direction::RightToLeft),
2 => None,
0 => ParagraphDirectionHint::LeftToRight,
1 => ParagraphDirectionHint::RightToLeft,
2 => ParagraphDirectionHint::AutoLeftToRight,
_ => panic!("invalid direction code {}", direction),
},
);
Expand Down Expand Up @@ -2086,15 +2093,15 @@ mod tests {
let inputs: Vec<BidiClass> = fields[0].split_whitespace().map(class_by_name).collect();
let bitset: u32 = fields[1].trim().parse().unwrap();

let mut directions: Vec<Option<Direction>> = vec![];
let mut directions: Vec<ParagraphDirectionHint> = vec![];
if bitset & 1 == 1 {
directions.push(None);
directions.push(ParagraphDirectionHint::AutoLeftToRight);
}
if bitset & 2 == 2 {
directions.push(Some(Direction::LeftToRight));
directions.push(ParagraphDirectionHint::LeftToRight);
}
if bitset & 4 == 4 {
directions.push(Some(Direction::RightToLeft));
directions.push(ParagraphDirectionHint::RightToLeft);
}

let mut printed_summary = false;
Expand Down
3 changes: 3 additions & 0 deletions config/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@ pub struct Config {
#[serde(default)]
pub experimental_pixel_positioning: bool,

#[serde(default)]
pub experimental_bidi: bool,

#[serde(default = "default_stateless_process_list")]
pub skip_close_confirmation_for_processes_named: Vec<String>,

Expand Down
1 change: 1 addition & 0 deletions termwiz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ thiserror = "1.0"
unicode-segmentation = "1.8"
ucd-trie = "0.1"
vtparse = { version="0.6", path="../vtparse" }
wezterm-bidi = { path = "../bidi" }

[features]
widgets = ["cassowary", "fnv"]
Expand Down
79 changes: 77 additions & 2 deletions termwiz/src/cellcluster.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::cell::{Cell, CellAttributes};
use crate::emoji::Presentation;
use std::borrow::Cow;
use wezterm_bidi::{BidiContext, Direction, ParagraphDirectionHint};

/// A `CellCluster` is another representation of a Line.
/// A `Vec<CellCluster>` is produced by walking through the Cells in
Expand All @@ -13,6 +14,7 @@ pub struct CellCluster {
pub text: String,
pub width: usize,
pub presentation: Presentation,
pub direction: Direction,
byte_to_cell_idx: Vec<usize>,
byte_to_cell_width: Vec<u8>,
pub first_cell_idx: usize,
Expand Down Expand Up @@ -46,6 +48,7 @@ impl CellCluster {
hint: usize,
iter: impl Iterator<Item = (usize, &'a Cell)>,
cursor_idx: Option<usize>,
bidi_hint: Option<ParagraphDirectionHint>,
) -> Vec<CellCluster> {
let mut last_cluster = None;
let mut clusters = Vec::new();
Expand All @@ -64,7 +67,7 @@ impl CellCluster {
Cow::Borrowed(c.attrs())
};

let is_cursor_boundary = Some(cell_idx) == cursor_idx;
let is_cursor_boundary = bidi_hint.is_none() && Some(cell_idx) == cursor_idx;
let was_cursor = last_was_cursor;
last_was_cursor = is_cursor_boundary;

Expand Down Expand Up @@ -143,7 +146,78 @@ impl CellCluster {
clusters.push(cluster);
}

clusters
if let Some(hint) = bidi_hint {
let mut resolved_clusters = vec![];

let mut context = BidiContext::new();
for cluster in clusters {
Self::resolve_bidi(&mut context, hint, cluster, &mut resolved_clusters);
}

resolved_clusters
} else {
clusters
}
}

fn resolve_bidi(
context: &mut BidiContext,
hint: ParagraphDirectionHint,
cluster: CellCluster,
resolved: &mut Vec<Self>,
) {
let mut paragraph = Vec::with_capacity(cluster.text.len());
let mut codepoint_index_to_byte_idx = Vec::with_capacity(cluster.text.len());
for (byte_idx, c) in cluster.text.char_indices() {
codepoint_index_to_byte_idx.push(byte_idx);
paragraph.push(c);
}

context.resolve_paragraph(&paragraph, hint);
for run in context.runs() {
let mut text = String::with_capacity(run.range.end - run.range.start);
let mut byte_to_cell_idx = vec![];
let mut byte_to_cell_width = vec![];
let mut width = 0usize;
let mut first_cell_idx = None;

for cp_idx in run.indices() {
let cp = paragraph[cp_idx];
text.push(cp);

let original_byte = codepoint_index_to_byte_idx[cp_idx];
let cell_width = cluster.byte_to_cell_width(original_byte);
width += cell_width as usize;

let cell_idx = cluster.byte_to_cell_idx(original_byte);
if first_cell_idx.is_none() {
first_cell_idx.replace(cell_idx);
}

if !cluster.byte_to_cell_width.is_empty() {
for _ in 0..cp.len_utf8() {
byte_to_cell_width.push(cell_width);
}
}

if !cluster.byte_to_cell_idx.is_empty() {
for _ in 0..cp.len_utf8() {
byte_to_cell_idx.push(cell_idx);
}
}
}

resolved.push(CellCluster {
attrs: cluster.attrs.clone(),
text,
width,
direction: run.direction,
presentation: cluster.presentation,
byte_to_cell_width,
byte_to_cell_idx,
first_cell_idx: first_cell_idx.unwrap(),
});
}
}

/// Start off a new cluster with some initial data
Expand Down Expand Up @@ -182,6 +256,7 @@ impl CellCluster {
byte_to_cell_idx: idx,
byte_to_cell_width,
first_cell_idx: cell_idx,
direction: Direction::LeftToRight,
}
}

Expand Down
14 changes: 12 additions & 2 deletions termwiz/src/surface/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize};
use std::ops::Range;
use std::sync::Arc;
use unicode_segmentation::UnicodeSegmentation;
use wezterm_bidi::ParagraphDirectionHint;

bitflags! {
#[cfg_attr(feature="use_serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -726,8 +727,17 @@ impl Line {
})
}

pub fn cluster(&self, cursor_idx: Option<usize>) -> Vec<CellCluster> {
CellCluster::make_cluster(self.cells.len(), self.visible_cells(), cursor_idx)
pub fn cluster(
&self,
cursor_idx: Option<usize>,
bidi_hint: Option<ParagraphDirectionHint>,
) -> Vec<CellCluster> {
CellCluster::make_cluster(
self.cells.len(),
self.visible_cells(),
cursor_idx,
bidi_hint,
)
}

pub fn cells(&self) -> &[Cell] {
Expand Down
1 change: 1 addition & 0 deletions wezterm-font/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ walkdir = "2"
wezterm-input-types = { path = "../wezterm-input-types" }
wezterm-term = { path = "../term", features=["use_serde"] }
wezterm-toast-notification = { path = "../wezterm-toast-notification" }
wezterm-bidi = { path = "../bidi" }

[target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
fontconfig = { path = "../deps/fontconfig" }
Expand Down
6 changes: 6 additions & 0 deletions wezterm-font/src/hbwrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ impl Buffer {
unsafe { hb_buffer_allocation_successful(buf) } != 0,
"hb_buffer_create failed"
);
unsafe {
hb_buffer_set_content_type(
buf,
harfbuzz::hb_buffer_content_type_t::HB_BUFFER_CONTENT_TYPE_UNICODE,
)
};
Ok(Buffer { buf })
}

Expand Down
15 changes: 13 additions & 2 deletions wezterm-font/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use std::sync::{Arc, Mutex};
use std::time::Duration;
use termwiz::cell::Presentation;
use thiserror::Error;
use wezterm_bidi::Direction;
use wezterm_term::CellAttributes;
use wezterm_toast_notification::ToastNotification;

Expand Down Expand Up @@ -114,6 +115,7 @@ impl LoadedFont {
&self,
text: &str,
presentation: Option<Presentation>,
direction: Direction,
) -> anyhow::Result<Vec<GlyphInfo>> {
loop {
let (tx, rx) = channel();
Expand All @@ -125,6 +127,7 @@ impl LoadedFont {
},
|_| {},
presentation,
direction,
) {
Ok(tuple) => tuple,
Err(err) if err.downcast_ref::<ClearShapeCache>().is_some() => {
Expand All @@ -148,9 +151,15 @@ impl LoadedFont {
completion: F,
filter_out_synthetic: FS,
presentation: Option<Presentation>,
direction: Direction,
) -> anyhow::Result<Vec<GlyphInfo>> {
let (_async_resolve, res) =
self.shape_impl(text, completion, filter_out_synthetic, presentation)?;
let (_async_resolve, res) = self.shape_impl(
text,
completion,
filter_out_synthetic,
presentation,
direction,
)?;
Ok(res)
}

Expand All @@ -160,6 +169,7 @@ impl LoadedFont {
completion: F,
filter_out_synthetic: FS,
presentation: Option<Presentation>,
direction: Direction,
) -> anyhow::Result<(bool, Vec<GlyphInfo>)> {
let mut no_glyphs = vec![];

Expand All @@ -182,6 +192,7 @@ impl LoadedFont {
self.dpi,
&mut no_glyphs,
presentation,
direction,
);

no_glyphs.retain(|&c| c != '\u{FE0F}' && c != '\u{FE0E}');
Expand Down
Loading

0 comments on commit 0324ff6

Please sign in to comment.