-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge #205: Implement error correction
76d0dae fuzz: add fuzztests that try to correct bech32 and codex32 errors (Andrew Poelstra) 383f788 correction: support erasures (Andrew Poelstra) 2e1b7be implement error correction (Andrew Poelstra) 6c24f98 primitives: introduce the Berlekamp-Massey algorithm for computing linear shift registers (Andrew Poelstra) fc903d6 field: require TryInto<Base> for ExtensionField (Andrew Poelstra) 4dfe325 field: add ability to multiply by integers (Andrew Poelstra) 74ec75f bech32: use correct generator exponents (Andrew Poelstra) Pull request description: This implements the core algorithms for error correction. In principle this exposes an API which is sufficient for somebody to implement error correction (of both substitutions and erasures). In practice the API is unlikely to be super usable because: * We yield error locations as indices from the *end* of the string rather than from the beginning (which we do because the error correction logic doesn't know the original string or even its length); * We similarly require the user indicate the location of erasures as indices from the end of the string; * We yield errors as GF32 offsets to be added to the current character in the string, rather than as correct characters (again, we do this because we don't know the string). * There is a situation in which we detectably cannot correct the string, but we yield some "corrections" anyway (to detect this case, we need to notice if the error iterator ends "early" for a technical definition of "early"; this is not too hard but there's an API question about whether the iterator should be yielding a `Result` or what). * We don't have a way for the user to signal erasures other than providing a valid bech32 character and then later telling the correction logic that the location is an erasure. We should be able to parse `?`s or something. There is also some missing functionality: * We should be able to correct "burst errors" where if the user indicates a long string of erasures all in a row, we should be able to correct up to checksum-length-many of them. (But if there are other errors, we then won't detect them, so I'm unsure what the UX should look like..) * Eventually we ought to have a "list decoder" which not only provides a unique best correction if one exists, but always provides a list of "plausible" corrections that the user would then need to check against the blockchain. This would involve a totally different error correction algorithm and I don't intend to do it in the next several years, but throwing it out there anyway. The next PR will be an "error correction API" PR. I would like some guidance from users on what this API should look like. ACKs for top commit: clarkmoody: ACK 76d0dae Tree-SHA512: 83c6e0a261475bfcf23bff0c7911714f4e366222a67881638818ee991dfe7900e8b38ece872a89ddcfa91cb15b89bd90b0d38d3ae87d2d079bda81c8ed4805e3
- Loading branch information
Showing
15 changed files
with
1,143 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
use bech32::primitives::LfsrIter; | ||
use bech32::Fe32; | ||
use honggfuzz::fuzz; | ||
|
||
fn do_test(data: &[u8]) { | ||
for ch in data { | ||
if *ch >= 32 { | ||
return; | ||
} | ||
} | ||
if data.is_empty() || data.len() > 1_000 { | ||
return; | ||
} | ||
|
||
let mut iv = Vec::with_capacity(data.len()); | ||
for ch in data { | ||
iv.push(Fe32::try_from(*ch).unwrap()); | ||
} | ||
|
||
for (i, d) in LfsrIter::berlekamp_massey(&iv).take(data.len()).enumerate() { | ||
assert_eq!(data[i], d.to_u8()); | ||
} | ||
} | ||
|
||
fn main() { | ||
loop { | ||
fuzz!(|data| { | ||
do_test(data); | ||
}); | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
fn extend_vec_from_hex(hex: &str, out: &mut Vec<u8>) { | ||
let mut b = 0; | ||
for (idx, c) in hex.as_bytes().iter().filter(|&&c| c != b'\n').enumerate() { | ||
b <<= 4; | ||
match *c { | ||
b'A'..=b'F' => b |= c - b'A' + 10, | ||
b'a'..=b'f' => b |= c - b'a' + 10, | ||
b'0'..=b'9' => b |= c - b'0', | ||
_ => panic!("Bad hex"), | ||
} | ||
if (idx & 1) == 1 { | ||
out.push(b); | ||
b = 0; | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn duplicate_crash() { | ||
let mut a = Vec::new(); | ||
extend_vec_from_hex("00", &mut a); | ||
super::do_test(&a); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
use std::collections::HashMap; | ||
|
||
use bech32::primitives::correction::CorrectableError as _; | ||
use bech32::primitives::decode::CheckedHrpstring; | ||
use bech32::{Bech32, Fe32}; | ||
use honggfuzz::fuzz; | ||
|
||
// coinbase output of block 862290 | ||
static CORRECT: &[u8; 62] = b"bc1qwzrryqr3ja8w7hnja2spmkgfdcgvqwp5swz4af4ngsjecfz0w0pqud7k38"; | ||
|
||
fn do_test(data: &[u8]) { | ||
if data.is_empty() || data.len() % 2 == 1 { | ||
return; | ||
} | ||
|
||
let mut any_actual_errors = false; | ||
let mut e2t = 0; | ||
let mut erasures = Vec::with_capacity(CORRECT.len()); | ||
// Start with a correct string | ||
let mut hrpstring = *CORRECT; | ||
// ..then mangle it | ||
let mut errors = HashMap::with_capacity(data.len() / 2); | ||
for sl in data.chunks_exact(2) { | ||
let idx = usize::from(sl[0]) & 0x7f; | ||
if idx >= CORRECT.len() - 3 { | ||
return; | ||
} | ||
let offs = match Fe32::try_from(sl[1]) { | ||
Ok(fe) => fe, | ||
Err(_) => return, | ||
}; | ||
|
||
hrpstring[idx + 3] = | ||
(Fe32::from_char(hrpstring[idx + 3].into()).unwrap() + offs).to_char() as u8; | ||
|
||
if errors.insert(CORRECT.len() - (idx + 3) - 1, offs).is_some() { | ||
return; | ||
} | ||
if sl[0] & 0x80 == 0x80 { | ||
// We might push "dummy" errors which are erasures that aren't actually wrong. | ||
// If we do this too many times, we'll exceed the singleton bound so correction | ||
// will fail, but as long as we're within the bound everything should "work", | ||
// in the sense that there will be no crashes and the error corrector will | ||
// just yield an error with value Q. | ||
erasures.push(CORRECT.len() - (idx + 3) - 1); | ||
e2t += 1; | ||
if offs != Fe32::Q { | ||
any_actual_errors = true; | ||
} | ||
} else if offs != Fe32::Q { | ||
any_actual_errors = true; | ||
e2t += 2; | ||
} | ||
} | ||
// We need _some_ errors. | ||
if !any_actual_errors { | ||
return; | ||
} | ||
|
||
let s = unsafe { core::str::from_utf8_unchecked(&hrpstring) }; | ||
let mut correct_ctx = CheckedHrpstring::new::<Bech32>(s) | ||
.unwrap_err() | ||
.correction_context::<Bech32>() | ||
.unwrap(); | ||
|
||
correct_ctx.add_erasures(&erasures); | ||
|
||
let iter = correct_ctx.bch_errors(); | ||
if e2t <= 3 { | ||
for (idx, fe) in iter.unwrap() { | ||
assert_eq!(errors.remove(&idx), Some(fe)); | ||
} | ||
for val in errors.values() { | ||
assert_eq!(*val, Fe32::Q); | ||
} | ||
} | ||
} | ||
|
||
fn main() { | ||
loop { | ||
fuzz!(|data| { | ||
do_test(data); | ||
}); | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
fn extend_vec_from_hex(hex: &str, out: &mut Vec<u8>) { | ||
let mut b = 0; | ||
for (idx, c) in hex.as_bytes().iter().filter(|&&c| c != b'\n').enumerate() { | ||
b <<= 4; | ||
match *c { | ||
b'A'..=b'F' => b |= c - b'A' + 10, | ||
b'a'..=b'f' => b |= c - b'a' + 10, | ||
b'0'..=b'9' => b |= c - b'0', | ||
_ => panic!("Bad hex"), | ||
} | ||
if (idx & 1) == 1 { | ||
out.push(b); | ||
b = 0; | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn duplicate_crash() { | ||
let mut a = Vec::new(); | ||
extend_vec_from_hex("04010008", &mut a); | ||
super::do_test(&a); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
use std::collections::HashMap; | ||
|
||
use bech32::primitives::correction::CorrectableError as _; | ||
use bech32::primitives::decode::CheckedHrpstring; | ||
use bech32::{Checksum, Fe1024, Fe32}; | ||
use honggfuzz::fuzz; | ||
|
||
/// The codex32 checksum algorithm, defined in BIP-93. | ||
/// | ||
/// Used in this fuzztest because it can correct up to 4 errors, vs bech32 which | ||
/// can correct only 1. Should exhibit more interesting behavior. | ||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
pub enum Codex32 {} | ||
|
||
impl Checksum for Codex32 { | ||
type MidstateRepr = u128; | ||
type CorrectionField = Fe1024; | ||
const ROOT_GENERATOR: Self::CorrectionField = Fe1024::new([Fe32::_9, Fe32::_9]); | ||
const ROOT_EXPONENTS: core::ops::RangeInclusive<usize> = 9..=16; | ||
|
||
const CHECKSUM_LENGTH: usize = 13; | ||
const CODE_LENGTH: usize = 93; | ||
// Copied from BIP-93 | ||
const GENERATOR_SH: [u128; 5] = [ | ||
0x19dc500ce73fde210, | ||
0x1bfae00def77fe529, | ||
0x1fbd920fffe7bee52, | ||
0x1739640bdeee3fdad, | ||
0x07729a039cfc75f5a, | ||
]; | ||
const TARGET_RESIDUE: u128 = 0x10ce0795c2fd1e62a; | ||
} | ||
|
||
static CORRECT: &[u8; 48] = b"ms10testsxxxxxxxxxxxxxxxxxxxxxxxxxx4nzvca9cmczlw"; | ||
|
||
fn do_test(data: &[u8]) { | ||
if data.is_empty() || data.len() % 2 == 1 { | ||
return; | ||
} | ||
|
||
let mut any_actual_errors = false; | ||
let mut e2t = 0; | ||
let mut erasures = Vec::with_capacity(CORRECT.len()); | ||
// Start with a correct string | ||
let mut hrpstring = *CORRECT; | ||
// ..then mangle it | ||
let mut errors = HashMap::with_capacity(data.len() / 2); | ||
for sl in data.chunks_exact(2) { | ||
let idx = usize::from(sl[0]) & 0x7f; | ||
if idx >= CORRECT.len() - 3 { | ||
return; | ||
} | ||
let offs = match Fe32::try_from(sl[1]) { | ||
Ok(fe) => fe, | ||
Err(_) => return, | ||
}; | ||
|
||
hrpstring[idx + 3] = | ||
(Fe32::from_char(hrpstring[idx + 3].into()).unwrap() + offs).to_char() as u8; | ||
|
||
if errors.insert(CORRECT.len() - (idx + 3) - 1, offs).is_some() { | ||
return; | ||
} | ||
if sl[0] & 0x80 == 0x80 { | ||
// We might push "dummy" errors which are erasures that aren't actually wrong. | ||
// If we do this too many times, we'll exceed the singleton bound so correction | ||
// will fail, but as long as we're within the bound everything should "work", | ||
// in the sense that there will be no crashes and the error corrector will | ||
// just yield an error with value Q. | ||
erasures.push(CORRECT.len() - (idx + 3) - 1); | ||
e2t += 1; | ||
if offs != Fe32::Q { | ||
any_actual_errors = true; | ||
} | ||
} else if offs != Fe32::Q { | ||
any_actual_errors = true; | ||
e2t += 2; | ||
} | ||
} | ||
// We need _some_ errors. | ||
if !any_actual_errors { | ||
return; | ||
} | ||
|
||
let s = unsafe { core::str::from_utf8_unchecked(&hrpstring) }; | ||
let mut correct_ctx = CheckedHrpstring::new::<Codex32>(s) | ||
.unwrap_err() | ||
.correction_context::<Codex32>() | ||
.unwrap(); | ||
|
||
correct_ctx.add_erasures(&erasures); | ||
|
||
let iter = correct_ctx.bch_errors(); | ||
if e2t <= 8 { | ||
for (idx, fe) in iter.unwrap() { | ||
assert_eq!(errors.remove(&idx), Some(fe)); | ||
} | ||
for val in errors.values() { | ||
assert_eq!(*val, Fe32::Q); | ||
} | ||
} | ||
} | ||
|
||
fn main() { | ||
loop { | ||
fuzz!(|data| { | ||
do_test(data); | ||
}); | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
fn extend_vec_from_hex(hex: &str, out: &mut Vec<u8>) { | ||
let mut b = 0; | ||
for (idx, c) in hex.as_bytes().iter().filter(|&&c| c != b'\n').enumerate() { | ||
b <<= 4; | ||
match *c { | ||
b'A'..=b'F' => b |= c - b'A' + 10, | ||
b'a'..=b'f' => b |= c - b'a' + 10, | ||
b'0'..=b'9' => b |= c - b'0', | ||
_ => panic!("Bad hex"), | ||
} | ||
if (idx & 1) == 1 { | ||
out.push(b); | ||
b = 0; | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn duplicate_crash() { | ||
let mut a = Vec::new(); | ||
extend_vec_from_hex("8c00a10091039e0185008000831f8e0f", &mut a); | ||
super::do_test(&a); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.