Skip to content

Commit

Permalink
Merge branch 'no-break-word' into 'master'
Browse files Browse the repository at this point in the history
Only wrap on space

See merge request mkjeldsen/commitmsgfmt!10
  • Loading branch information
commonquail committed Sep 21, 2018
2 parents 536226a + 3f6edad commit 16fff21
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 73 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
`commitmsgfmt` formats commit messages. It reflows and wraps text, with special
understanding of patterns often seen in commit messages.

## Unreleased

- When wrapping a line, only do so at the last space before the line length
limit; do not break any words to remain within the limit. "Words" that extend
beyond the limit are not likely to be meaningfully breakable in the first
place, and breaking is actively detrimental to URLs. This change alone yields
a 6-8 times speed-up as reported by `time(1)`.

## 1.1.0 - 2018-08-25

- #3: If the `core.commentChar` setting is set to an explicit character, use
Expand Down
15 changes: 11 additions & 4 deletions doc/commitmsgfmt.1.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,13 @@ counter-productive.
=== Paragraph

Generally, any two consecutive lines will be considered to belong to the same
_paragraph_. Paragraphs are wrapped either at the last space up to *--width*
graphemes, or at *--width* if there are no spaces in the respective line.
Paragraphs are surrounded by a single empty line, additional empty lines being
removed.
_paragraph_. Paragraphs are wrapped at the last space up to *--width*
graphemes. Paragraphs are surrounded by a single empty line, additional empty
lines being removed.

A line that extends beyond *--width* but contains no space--effectively a
single word, or for instance a long URL--will not be wrapped. Indentation
spaces are ignored in this context.

{self} currently uses the traditional greedy _minimum number of lines_ wrapping
algorithm{empty}footnoteref:[wrap-algo,{uri-algo}], like *fmt*(1) and Vim, for
Expand Down Expand Up @@ -242,6 +245,7 @@ foo bar baz [1]
- foo
1. foo bar
baz
2. https://www.url.example long word
[1] abcdefghijklmnopq
----
Expand All @@ -257,6 +261,9 @@ baz [1]
- foo
1. foo bar
baz
2. https://www.url.example
long
word
[1] abcdefghijklmnopq
----
Expand Down
163 changes: 95 additions & 68 deletions src/commitmsgfmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,76 +38,17 @@ impl CommitMsgFmt {
let mut continuation = String::with_capacity(indent.len() + li.len());
continuation.push_str(indent);
continuation.push_str(&" ".repeat(li.len()));
let mut rest = s.clone();
buf.push_str(indent);
buf.push_str(li);

let mut rest_len = rest.graphemes(true).count();
let limit = self.width - continuation.len();

while rest_len > limit {
let mut end = limit + 1;
while !rest.is_char_boundary(end) {
end -= 1;
}
let (end, start) = match rest[..end].rfind(' ') {
Some(prev_space) => (prev_space, prev_space + 1),
None => match rest.grapheme_indices(true).nth(end) {
Some((ll, _)) => (ll, ll),
None => (end, end),
},
};
buf.push_str(&rest[..end]);
buf.push('\n');
buf.push_str(&continuation);
rest = rest.split_off(start);
rest_len = rest.len();
}
buf.push_str(&rest);
self.wrap_paragraph_into(&mut buf, &s, Some(&continuation));
buf.push('\n');
}
Literal(ref l) => {
buf.push_str(l.as_str());
}
Paragraph(ref p) => {
let mut line = String::with_capacity(self.width);
for c in p.chars() {
let len = line.graphemes(true).count();
match c {
' ' => {
if len < self.width {
line.push(c);
} else {
buf.push_str(&line);
buf.push('\n');
line.clear();
}
}
_ => {
if len < self.width {
line.push(c);
} else {
let rest = match line.rfind(' ') {
Some(prev_space) => {
let rest = line.split_off(prev_space);
Some(rest)
}
None => None,
};
buf.push_str(&line);
buf.push('\n');

line.clear();

if let Some(rest) = rest {
line.push_str(&rest[1..]);
}
line.push(c);
}
}
}
}
buf.push_str(&line);
self.wrap_paragraph_into(&mut buf, &p, None);
buf.push('\n');
}
Reference(ref s) | Subject(ref s) | Trailer(ref s) => {
Expand All @@ -120,6 +61,33 @@ impl CommitMsgFmt {

buf
}

fn wrap_paragraph_into(&self, buf: &mut String, paragraph: &str, continuation: Option<&str>) {
let limit = match continuation {
Some(ref c) => self.width - c.len(),
None => self.width,
};
let mut cur_line_len = 0;
for word in paragraph.split(' ') {
let word_len = word.graphemes(true).count();

// Not a new line so we need to fiddle with whitespace.
if cur_line_len != 0 {
if cur_line_len + word_len > limit {
cur_line_len = 0;
buf.push('\n');
if let Some(cont) = continuation {
buf.push_str(&cont);
}
} else {
buf.push(' ');
}
}

buf.push_str(&word);
cur_line_len += word_len + 1;
}
}
}

#[cfg(test)]
Expand All @@ -142,11 +110,10 @@ mod tests {

#[test]
fn formats_long_single_line_message() {
let s = "f".repeat(105);
let s = "f".repeat(100);
assert_eq!(filter(10, &s), "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
ffffffffff
fffff
");
}

Expand All @@ -155,14 +122,14 @@ fffff
let s = "
ääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääë
öööööü";
öööö ü";

assert_eq!(filter(5, &s), "
ääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääääää
ë
ööööö
öööö
ü
", "break before every changing grapheme");
}
Expand Down Expand Up @@ -266,9 +233,7 @@ foo
- foo foo
baaaaar bar
- ääääääääääääää
ëëëëëëëëëëëëëë
ö
- ääääääääääääääëëëëëëëëëëëëëëö
";

assert_eq!(filter(16, &input), expected);
Expand Down Expand Up @@ -447,4 +412,66 @@ content
let fmt = CommitMsgFmt::new(72, ';');
assert_eq!(fmt.filter(&input), expected);
}

#[test]
fn bug_wrapping_treats_exotic_whitespace_differently_from_ascii_spaces() {
// Whether wrapping should treat all whitespace the same is debatable but certainly we only
// consider the regular ASCII space. This test documents the acknowledgement thereof, not a
// deliberate decision, and classifies it a bug for that reason.
//
// Some largely arbitrary whitespace characters from
// https://en.wikipedia.org/w/index.php?title=Whitespace_character&oldid=858017200
let input = "
foo
a b\tc\u{00a0}d\u{2003}e\u{2009}\u{2009}f\u{202f}g
a b\tc\u{00a0}d\u{2003}e\u{2009}f\u{202f}g
";

let expected = "
foo
a
b\tc\u{00a0}d\u{2003}e\u{2009}\u{2009}f\u{202f}g
a b\tc\u{00a0}d\u{2003}e\u{2009}f\u{202f}g
";
assert_eq!(filter(2, &input), expected);
}

#[test]
fn does_not_break_words() {
let input = "
foo
foo bar baz
qux areallylongword
and https://a.really-long-url.example
- foo bar baz
- qux https://a.really-long-url.example
- https://a.really-long-url.example
[1] https://a.really-long-url.example
";
let expected = "
foo
foo bar
baz qux
areallylongword
and
https://a.really-long-url.example
- foo bar
baz
- qux
https://a.really-long-url.example
- https://a.really-long-url.example
[1] https://a.really-long-url.example
";
assert_eq!(filter(10, &input), expected);
}
}
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ mod tests {
cmd.stdin
.as_mut()
.expect("child stdin")
.write_all(b"subject\nbody")
.write_all(b"subject\nb o d y")
.expect("write to child stdin");

let output = cmd.wait_with_output().expect("run debug binary");
Expand Down

0 comments on commit 16fff21

Please sign in to comment.