forked from capstone-rust/capstone-rs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.rs
366 lines (321 loc) · 11.9 KB
/
build.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
//! The following environment variables affect the build:
//!
//! * `UPDATE_CAPSTONE_BINDINGS`: setting indicates that the pre-generated `capstone.rs` should be
//! updated with the output bindgen
//!
//! # Bindgen enum mapping
//!
//! Bindgen can convert C enums in several ways:
//!
//! 1. **"Rustified" enum**: Bindgen creates a Rust enum, which provides the most "type safety" and
//! reduces the chance of confusing variants for a different type. For variants whose
//! discriminant values are not distinct, bindgen defines constants.
//! 2. **"Constified" enum**: Bindgen defines constants for each enum variant.
//! 3. **"Constified" enum module**: Bindgen defines constants for each enum variant in a separate
//! module.
//!
//! # Rationale for enum types
//!
//! Rustified enum: these have distinct variant discriminants
//!
//! * `cs_arch`
//! * `cs_op_type`
//! * `cs_opt_type`
//!
//! Constified enum module:
//!
//! * `cs_err`: avoid undefined behavior in case an error is instantiated with an invalid value; the
//! compiler could make false assumptions that the value is only within a certain range.
//! * `cs_group_type`/`ARCH_insn_group`: each architecture adds group types to the `cs_group_type`,
//! so we constify to avoid needing to transmute.
//! * `cs_mode`: used as a bitmask; when values are OR'd together, they are not a valid discriminant
//! value
//! * `cs_opt_value`/`ARCH_reg`: variant discriminants are not unique
//!
//! Bitfield enum: fields are OR'd together to form new values
//! * `cs_mode`
#[cfg(feature = "use_bindgen")]
extern crate bindgen;
extern crate cc;
#[cfg(feature = "use_bindgen")]
extern crate regex;
#[cfg(feature = "use_bindgen")]
use {
regex::Regex,
std::{fs::File, io::Write},
};
use std::env;
use std::fs::copy;
use std::path::PathBuf;
include!("common.rs");
const CAPSTONE_DIR: &str = "capstone";
/// Indicates how capstone library should be linked
#[allow(dead_code)]
enum LinkType {
Dynamic,
Static,
}
/// Build capstone using the cc crate
fn build_capstone_cc() {
use std::fs::DirEntry;
fn read_dir_and_filter<F: Fn(&DirEntry) -> bool>(dir: &str, filter: F) -> Vec<String> {
use std::fs::read_dir;
read_dir(dir)
.expect("Failed to read capstone source directory")
.map(|e| e.expect("Failed to read capstone source directory"))
.filter(|e| filter(e))
.map(|e| {
format!(
"{}/{}",
dir,
e.file_name().to_str().expect("Invalid filename")
)
})
.collect()
}
fn find_c_source_files(dir: &str) -> Vec<String> {
read_dir_and_filter(dir, |e| {
let file_type = e
.file_type()
.expect("Failed to read capstone source directory");
let file_name = e.file_name().into_string().expect("Invalid filename");
file_type.is_file() && (file_name.ends_with(".c") || file_name.ends_with(".C"))
})
}
fn find_arch_dirs() -> Vec<String> {
read_dir_and_filter(&format!("{}/{}", CAPSTONE_DIR, "arch"), |e| {
let file_type = e
.file_type()
.expect("Failed to read capstone source directory");
file_type.is_dir()
})
}
let mut files = find_c_source_files(CAPSTONE_DIR);
for arch_dir in find_arch_dirs().into_iter() {
files.append(&mut find_c_source_files(&arch_dir));
}
// keep build reproducible
files.sort();
let use_static_crt = {
let target_features = env::var("CARGO_CFG_TARGET_FEATURE").unwrap_or_default();
target_features.split(',').any(|f| f == "crt-static")
};
let mut builder = cc::Build::new();
builder
.files(files)
.include(format!("{}/{}", CAPSTONE_DIR, "include"))
.define("CAPSTONE_USE_SYS_DYN_MEM", None)
.define("CAPSTONE_HAS_ARM", None)
.define("CAPSTONE_HAS_ARM64", None)
.define("CAPSTONE_HAS_EVM", None)
.define("CAPSTONE_HAS_M680X", None)
.define("CAPSTONE_HAS_M68K", None)
.define("CAPSTONE_HAS_MIPS", None)
.define("CAPSTONE_HAS_POWERPC", None)
.define("CAPSTONE_HAS_RISCV", None)
.define("CAPSTONE_HAS_SPARC", None)
.define("CAPSTONE_HAS_SYSZ", None)
.define("CAPSTONE_HAS_TMS320C64X", None)
.define("CAPSTONE_HAS_WASM", None)
.define("CAPSTONE_HAS_X86", None)
.define("CAPSTONE_HAS_XCORE", None)
.define("CAPSTONE_HAS_BPF", None)
// No need to display any warnings from the C library
.flag_if_supported("-w")
.static_crt(use_static_crt);
if !cfg!(feature = "full") {
builder.define("CAPSTONE_DIET", "yes");
}
builder.compile("capstone");
}
/// Search for header in search paths
#[cfg(feature = "use_bindgen")]
fn find_capstone_header(header_search_paths: &[PathBuf], name: &str) -> Option<PathBuf> {
for search_path in header_search_paths.iter() {
let potential_file = search_path.join(name);
if potential_file.is_file() {
return Some(potential_file);
}
}
None
}
/// Gets environment variable value. Panics if variable is not set.
fn env_var(var: &str) -> String {
env::var(var).unwrap_or_else(|_| panic!("Environment variable {} is not set", var))
}
/// Parse generated bindings and impl from_insn_id() for all architectures
/// instructions enum declaration.
#[cfg(feature = "use_bindgen")]
fn impl_insid_to_insenum(bindings: &str) -> String {
use std::fmt::Write as _;
let mut impl_arch_enum = String::new();
impl_arch_enum.push_str("use core::convert::From;\n");
for cs_arch in ARCH_INCLUDES {
let arch = cs_arch.cs_name();
// find architecture instructions enum declaration
let re_enum_def = Regex::new(&format!("pub enum {}_insn (?s)\\{{.*?\\}}", arch))
.expect("Unable to compile regex");
let cap_enum_def = &re_enum_def
.captures(bindings)
.expect("Unable to capture group")[0];
// find instructions and their id
let re_ins_ids = Regex::new(&format!(
"{}_INS_(?P<ins>[A-Z0-9_]+) = (?P<id>\\d+)",
&arch.to_uppercase()
))
.expect("Unable to compile regex");
write!(
impl_arch_enum,
"impl From<u32> for {}_insn {{\n
fn from(id: u32) -> Self {{\n
match id {{\n",
&arch
)
.unwrap();
// fill match expression
for cap_ins_id in re_ins_ids.captures_iter(cap_enum_def) {
writeln!(
impl_arch_enum,
"{} => {}_insn::{}_INS_{},",
&cap_ins_id["id"],
&arch,
&arch.to_uppercase(),
&cap_ins_id["ins"]
)
.unwrap();
}
// if id didn't match, return [arch]_INS_INVALID.
// special case for m680x which has 'INVLD' variant instead of 'INVALID'
let invalid_str = match arch {
"m680x" => "INVLD",
_ => "INVALID",
};
write!(
impl_arch_enum,
"_ => {}_insn::{}_INS_{},",
&arch,
&arch.to_uppercase(),
invalid_str,
)
.unwrap();
impl_arch_enum.push_str("}\n}\n}\n");
}
impl_arch_enum
}
/// Create bindings using bindgen
#[cfg(feature = "use_bindgen")]
fn write_bindgen_bindings(
header_search_paths: &[PathBuf],
update_pregenerated_bindings: bool,
pregenerated_bindgen_header: PathBuf,
pregenerated_bindgen_impl: PathBuf,
out_bindings_path: PathBuf,
out_impl_path: PathBuf,
) {
#[allow(deprecated)]
let mut builder = bindgen::Builder::default()
.rust_target(bindgen::RustTarget::Stable_1_19)
.size_t_is_usize(true)
.use_core()
.ctypes_prefix("libc")
.header(
find_capstone_header(header_search_paths, "capstone.h")
.expect("Could not find header")
.to_str()
.unwrap(),
)
.disable_name_namespacing()
.prepend_enum_name(false)
.generate_comments(true)
.layout_tests(false) // eliminate test failures on platforms with different pointer sizes
.impl_debug(true)
.constified_enum_module("cs_err|cs_group_type|cs_opt_value")
.bitfield_enum("cs_mode|cs_ac_type")
.rustified_enum(".*")
.no_copy("cs_insn");
// Whitelist cs_.* functions and types
let pattern = String::from("cs_.*");
builder = builder
.allowlist_function(&pattern)
.allowlist_type(&pattern);
// Whitelist types with architectures
for arch in ARCH_INCLUDES {
// .*(^|_)ARCH(_|$).*
let arch_type_pattern = format!(".*(^|_){}(_|$).*", arch.cs_name);
let const_mod_pattern = format!("^{}_(reg|insn_group)$", arch.cs_name);
builder = builder
.allowlist_type(&arch_type_pattern)
.constified_enum_module(&const_mod_pattern);
}
let bindings = builder.generate().expect("Unable to generate bindings");
// Write bindings to $OUT_DIR/bindings.rs
bindings
.write_to_file(&out_bindings_path)
.expect("Unable to write bindings");
// Parse bindings and impl fn to cast u32 to <arch>_insn, write output to file
let bindings_impl_str = impl_insid_to_insenum(&bindings.to_string());
let mut bindings_impl = File::create(&out_impl_path).expect("Unable to open file");
bindings_impl
.write_all(bindings_impl_str.as_bytes())
.expect("Unable to write file");
if update_pregenerated_bindings {
copy(out_bindings_path, pregenerated_bindgen_header)
.expect("Unable to update capstone bindings");
copy(out_impl_path, pregenerated_bindgen_impl).expect("Unable to update capstone bindings");
}
}
fn main() {
#[allow(unused_assignments)]
let mut link_type: Option<LinkType> = None;
// C header search paths
let mut header_search_paths: Vec<PathBuf> = Vec::new();
build_capstone_cc();
header_search_paths.push([CAPSTONE_DIR, "include", "capstone"].iter().collect());
link_type = Some(LinkType::Static);
match link_type.expect("Must specify link type") {
LinkType::Dynamic => {
println!("cargo:rustc-link-lib=dylib=capstone");
}
LinkType::Static => {
println!("cargo:rustc-link-lib=static=capstone");
}
}
// If UPDATE_CAPSTONE_BINDINGS is set, then updated the pre-generated capstone bindings
let update_pregenerated_bindings = env::var("UPDATE_CAPSTONE_BINDINGS").is_ok();
if update_pregenerated_bindings && !cfg!(feature = "use_bindgen") {
panic!( "Setting UPDATE_CAPSTONE_BINDINGS only makes sense when enabling feature \"use_bindgen\"");
}
let pregenerated_bindgen_header: PathBuf = [
env_var("CARGO_MANIFEST_DIR"),
"pre_generated".into(),
BINDINGS_FILE.into(),
]
.iter()
.collect();
let pregenerated_bindgen_impl: PathBuf = [
env_var("CARGO_MANIFEST_DIR"),
"pre_generated".into(),
BINDINGS_IMPL_FILE.into(),
]
.iter()
.collect();
let out_bindings_path = PathBuf::from(env_var("OUT_DIR")).join(BINDINGS_FILE);
let out_impl_path = PathBuf::from(env_var("OUT_DIR")).join(BINDINGS_IMPL_FILE);
// Only run bindgen if we are *not* using the bundled capstone bindings
#[cfg(feature = "use_bindgen")]
write_bindgen_bindings(
&header_search_paths,
update_pregenerated_bindings,
pregenerated_bindgen_header,
pregenerated_bindgen_impl,
out_bindings_path,
out_impl_path,
);
// Otherwise, copy the pregenerated bindings
#[cfg(not(feature = "use_bindgen"))]
{
copy(pregenerated_bindgen_header, out_bindings_path)
.expect("Unable to update capstone bindings");
copy(pregenerated_bindgen_impl, out_impl_path).expect("Unable to update capstone bindings");
}
}