-
Notifications
You must be signed in to change notification settings - Fork 74
/
fixlengths.rs
146 lines (130 loc) · 5.21 KB
/
fixlengths.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
static USAGE: &str = r#"
Transforms CSV data so that all records have the same length. The length is
the length of the longest record in the data (not counting trailing empty fields,
but at least 1). Records with smaller lengths are padded with empty fields.
This requires two complete scans of the CSV data: one for determining the
record size and one for the actual transform. Because of this, the input
given must be a file and not stdin.
Alternatively, if --length is set, then all records are forced to that length.
This requires a single pass and can be done with stdin.
Usage:
qsv fixlengths [options] [<input>]
qsv fixlengths --help
fixlengths options:
-l, --length <arg> Forcefully set the length of each record. If a
record is not the size given, then it is truncated
or expanded as appropriate.
-i, --insert <pos> If empty fields need to be inserted, insert them
at <pos>. If <pos> is zero, then it is inserted
at the end of each record. If <pos> is negative, it
is inserted from the END of each record going backwards.
If <pos> is positive, it is inserted from the BEGINNING
of each record going forward. [default: 0]
--quote <arg> The quote character to use. [default: "]
--escape <arg> The escape character to use. When not specified,
quotes are escaped by doubling them.
Common options:
-h, --help Display this message
-o, --output <file> Write output to <file> instead of stdout.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
"#;
use std::cmp;
use serde::Deserialize;
use crate::{
config::{Config, Delimiter},
util, CliResult,
};
#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_length: Option<usize>,
flag_insert: i16,
flag_quote: Delimiter,
flag_escape: Option<Delimiter>,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
}
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;
let mut config = Config::new(args.arg_input.as_ref())
.delimiter(args.flag_delimiter)
.quote(args.flag_quote.as_byte())
.no_headers(true)
.flexible(true);
if let Some(escape) = args.flag_escape {
config = config.escape(Some(escape.as_byte())).double_quote(false);
}
let length = if let Some(length) = args.flag_length {
if length == 0 {
return fail_incorrectusage_clierror!("Length must be greater than 0.");
}
length
} else {
// --length not set, so we need to determine the length of the longest record
// by scanning the entire file
if config.is_stdin() {
return fail_incorrectusage_clierror!(
"<stdin> cannot be used in this command. Please specify a file path."
);
}
let mut maxlen = 0_usize;
let mut rdr = config.reader()?;
let mut record = csv::ByteRecord::new();
while rdr.read_byte_record(&mut record)? {
let mut nonempty_count = 0;
for (index, field) in record.iter().enumerate() {
if index == 0 || !field.is_empty() {
nonempty_count = index + 1;
}
}
maxlen = cmp::max(maxlen, nonempty_count);
}
maxlen
};
let mut rdr = config.reader()?;
let mut wtr = Config::new(args.flag_output.as_ref()).writer()?;
let mut record = csv::ByteRecord::new();
let mut record_work = csv::ByteRecord::new();
#[allow(unused_assignments)]
let mut field_idx = 1_i16;
let insert_pos = if args.flag_insert < 0 {
length as i16 + args.flag_insert
} else {
args.flag_insert
};
// log::debug!("length: {length} insert_pos: {insert_pos}");
while rdr.read_byte_record(&mut record)? {
if length >= record.len() {
if args.flag_insert == 0 {
for _ in record.len()..length {
record.push_field(b"");
}
} else {
record_work.clear();
field_idx = 1_i16;
for field in &record {
if field_idx == insert_pos {
// insert all the empty fields at the insert position
for _ in record.len()..length {
record_work.push_field(b"");
}
}
record_work.push_field(field);
field_idx += 1;
}
if record_work.len() <= length {
// insert all the empty fields at the end
for _ in record_work.len()..length {
record_work.push_field(b"");
}
}
record.clone_from(&record_work);
}
} else {
record.truncate(length);
}
wtr.write_byte_record(&record)?;
}
Ok(wtr.flush()?)
}