Skip to content

Commit

Permalink
Merge pull request #129 from sachaos/tab-handling
Browse files Browse the repository at this point in the history
Support ansicode and char width for tab
  • Loading branch information
sachaos authored Aug 19, 2024
2 parents b2211f4 + 8ddf42f commit a597844
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 38 deletions.
8 changes: 5 additions & 3 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,11 +339,11 @@ impl<S: Store> App<S> {
if let Some(record) = record {
action_tx.send(Action::SetClock(record.start_time))?;
let mut result = termtext::Converter::new(style)
.convert(&normalize_stdout(record.stdout.clone()));
.convert(&normalize_stdout(&record.stdout));
log::debug!("result: {:?}", result);
if record.stdout.is_empty() {
result = termtext::Converter::new(style)
.convert(&normalize_stdout(record.stderr.clone()));
.convert(&normalize_stdout(&record.stderr));
result.mark_text(
0,
result.len(),
Expand All @@ -357,7 +357,9 @@ impl<S: Store> App<S> {
let previous_record = self.store.get_record(previous_id)?;
if let Some(previous_record) = previous_record {
let previous_result = termtext::Converter::new(style)
.convert(&normalize_stdout(previous_record.stdout));
.convert(&normalize_stdout(
&previous_record.stdout,
));
let previous_string = previous_result.plain_text();
if diff_mode == DiffMode::Add {
diff_and_mark(
Expand Down
127 changes: 92 additions & 35 deletions src/bytes.rs
Original file line number Diff line number Diff line change
@@ -1,59 +1,116 @@
use unicode_width::UnicodeWidthChar;

const TAB_SIZE: usize = 4;

pub fn normalize_stdout(b: Vec<u8>) -> Vec<u8> {
pub fn normalize_stdout(s: &[u8]) -> Vec<u8> {
// Naively replace tabs ('\t') with at most `TAB_SIZE` spaces (' ') while
// maintaining the alignment / elasticity per line (see tests below).
let mut b = b;
let (mut i, mut j) = (0, 0); // j tracks alignment
while i < b.len() {
if b[i] == b'\n' {
(i, j) = (i + 1, 0);
} else if b[i] == b'\t' {
b[i] = b' ';
let r = TAB_SIZE - (j % TAB_SIZE);
for _ in 1..r {
b.insert(i, b' ');
let str = String::from_utf8_lossy(s).to_string();
let mut b = Vec::with_capacity(str.len() * TAB_SIZE);
let mut chars = str.chars();
let mut width = 0;
while let Some(c) = chars.next() {
let count = skip_ansi_escape_sequence(c, &mut chars.clone());
if count > 0 {
b.push(c);
for _ in 0..count {
b.push(chars.next().unwrap_or(' '));
}
(i, j) = (i + r, 0);
continue;
}

if c == '\t' {
let r = TAB_SIZE - (width % TAB_SIZE);
b.resize(b.len() + r, ' ');
width += r;
} else if c == '\n' {
b.push('\n');
width = 0;
} else {
(i, j) = (i + 1, j + 1);
b.push(c);
width += c.width().unwrap_or(1);
}
}
b
b.into_iter().collect::<String>().into_bytes()
}

// Based on https://github.com/mgeisler/textwrap/blob/63970361d1d653ec8715acb931c3c109750d4a57/src/core.rs
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
/// sequence. This is typically used for colored text and will be
/// ignored when computing the text width.
const CSI: (char, char) = ('\x1b', '[');
/// The final bytes of an ANSI escape sequence must be in this range.
const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
/// Skip ANSI escape sequences.
///
/// The `ch` is the current `char`, the `chars` provide the following
/// characters. The `chars` will be modified if `ch` is the start of
/// an ANSI escape sequence.
///
/// Returns `usize` the count of skipped characters
fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> usize {
let mut count = 0;
if ch != CSI.0 {
return 0; // Nothing to skip here.
}

let next = chars.next();
count += 1;
if next == Some(CSI.1) {
// We have found the start of an ANSI escape code, typically
// used for colored terminal text. We skip until we find a
// "final byte" in the range 0x40–0x7E.
for ch in chars {
count += 1;
if ANSI_FINAL_BYTE.contains(&ch) {
break;
}
}
} else if next == Some(']') {
// We have found the start of an Operating System Command,
// which extends until the next sequence "\x1b\\" (the String
// Terminator sequence) or the BEL character. The BEL
// character is non-standard, but it is still used quite
// often, for example, by GNU ls.
let mut last = ']';
for new in chars {
count += 1;
if new == '\x07' || (new == '\\' && last == CSI.0) {
break;
}
last = new;
}
}

count
}

mod test {
use super::*;

#[test]
fn test_normalize_stdout() {
assert_eq!(normalize_stdout(b"\t"), b" ");
// Make sure we don't miss any tabs in edge cases.
assert_eq!(
normalize_stdout(b"\t\t\t\t\t".to_vec()),
b" ".to_vec()
);
assert_eq!(normalize_stdout(b"\t\t\t\t\t"), b" ");
// Make sure tab is elastic (from 1 space to TAB_SIZE spaces).
assert_eq!(normalize_stdout(b"\t12345".to_vec()), b" 12345".to_vec());
assert_eq!(normalize_stdout(b"1\t2345".to_vec()), b"1 2345".to_vec());
assert_eq!(normalize_stdout(b"12\t345".to_vec()), b"12 345".to_vec());
assert_eq!(normalize_stdout(b"123\t45".to_vec()), b"123 45".to_vec());
assert_eq!(normalize_stdout(b"1234\t5".to_vec()), b"1234 5".to_vec());
assert_eq!(normalize_stdout(b"\t12345"), b" 12345");
assert_eq!(normalize_stdout(b"1\t2345"), b"1 2345");
assert_eq!(normalize_stdout(b"12\t345"), b"12 345");
assert_eq!(normalize_stdout(b"123\t45"), b"123 45");
assert_eq!(normalize_stdout(b"1234\t5"), b"1234 5");
// Make sure we reset alignment on new lines.
assert_eq!(normalize_stdout(b"123\t\n4\t5"), b"123 \n4 5");
assert_eq!(normalize_stdout(b"12\t3\n4\t5"), b"12 3\n4 5");
assert_eq!(normalize_stdout(b"1\t23\n4\t5"), b"1 23\n4 5");
assert_eq!(normalize_stdout(b"\t123\n4\t5"), b" 123\n4 5");
assert_eq!(
normalize_stdout(b"123\t\n4\t5".to_vec()),
b"123 \n4 5".to_vec()
);
assert_eq!(
normalize_stdout(b"12\t3\n4\t5".to_vec()),
b"12 3\n4 5".to_vec()
);
assert_eq!(
normalize_stdout(b"1\t23\n4\t5".to_vec()),
b"1 23\n4 5".to_vec()
normalize_stdout("あ\t\n\tえ".as_bytes()),
"あ い\nう え".as_bytes()
);
assert_eq!(
normalize_stdout(b"\t123\n4\t5".to_vec()),
b" 123\n4 5".to_vec()
normalize_stdout(b"\x1b[34ma\t\x1b[39mb\x1b[0m"),
b"\x1b[34ma \x1b[39mb\x1b[0m"
);
}
}

0 comments on commit a597844

Please sign in to comment.