From 62d46708eb0cffea6ec9f1d2b8c20445ebfed1a6 Mon Sep 17 00:00:00 2001 From: dev-bitSmiley <153714963+bitSmiley@users.noreply.github.com> Date: Wed, 31 Jul 2024 03:16:22 +0800 Subject: [PATCH] feat: parse inscription like witness data (#2524) * parse inscription like witness data * more comment * remove unused code * Update zetaclient/chains/bitcoin/tx_script.go Co-authored-by: Dmitry S <11892559+swift1337@users.noreply.github.com> * Update zetaclient/chains/bitcoin/observer/inbound.go Co-authored-by: Dmitry S <11892559+swift1337@users.noreply.github.com> * Update zetaclient/chains/bitcoin/tx_script.go Co-authored-by: Dmitry S <11892559+swift1337@users.noreply.github.com> * Update zetaclient/chains/bitcoin/tx_script.go Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * pull origin * Update zetaclient/chains/bitcoin/observer/inbound.go Co-authored-by: Dmitry S <11892559+swift1337@users.noreply.github.com> * review feedbacks * update review feedbacks * update make generate * fix linter * remove over flow * Update zetaclient/chains/bitcoin/observer/inbound.go Co-authored-by: Francisco de Borja Aranda Castillejo * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo * Update zetaclient/chains/bitcoin/tokenizer.go Co-authored-by: Francisco de Borja Aranda Castillejo * update review feedback * update code commnet * update comment * more comments * Update changelog.md --------- Co-authored-by: Dmitry S <11892559+swift1337@users.noreply.github.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: Francisco de Borja Aranda Castillejo fix version --- changelog.md | 7 +- zetaclient/chains/bitcoin/observer/inbound.go | 36 ++++ zetaclient/chains/bitcoin/tokenizer.go | 162 ++++++++++++++++++ zetaclient/chains/bitcoin/tx_script.go | 71 ++++++++ zetaclient/chains/bitcoin/tx_script_test.go | 63 +++++++ 5 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 zetaclient/chains/bitcoin/tokenizer.go diff --git a/changelog.md b/changelog.md index 522ef7aafa..57ca7416b1 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,9 @@ # CHANGELOG -## Unreleased +## v19.1.0 + +* [2524](https://github.com/zeta-chain/node/pull/2524) - add inscription envolop parsing + ## v19.0.1 @@ -592,4 +595,4 @@ Getting the correct TSS address for Bitcoin now requires proviidng the Bitcoin c ### CI * [1218](https://github.com/zeta-chain/node/pull/1218) - cross-compile release binaries and simplify PR testings -* [1302](https://github.com/zeta-chain/node/pull/1302) - add mainnet builds to goreleaser \ No newline at end of file +* [1302](https://github.com/zeta-chain/node/pull/1302) - add mainnet builds to goreleaser diff --git a/zetaclient/chains/bitcoin/observer/inbound.go b/zetaclient/chains/bitcoin/observer/inbound.go index 15a3bfdc99..470c56d86f 100644 --- a/zetaclient/chains/bitcoin/observer/inbound.go +++ b/zetaclient/chains/bitcoin/observer/inbound.go @@ -477,3 +477,39 @@ func GetBtcEvent( } return nil, nil } + +// GetBtcEventWithWitness either returns a valid BTCInboundEvent or nil. +// This method supports data with more than 80 bytes by scanning the witness for possible presence of a tapscript. +// It will first prioritize OP_RETURN over tapscript. +func GetBtcEventWithWitness( + client interfaces.BTCRPCClient, + tx btcjson.TxRawResult, + tssAddress string, + blockNumber uint64, + logger zerolog.Logger, + netParams *chaincfg.Params, + depositorFee float64, +) (*BTCInboundEvent, error) { + // first check for OP_RETURN data + event, err := GetBtcEvent( + client, + tx, + tssAddress, + blockNumber, + logger, + netParams, + depositorFee, + ) + + if err != nil { + return nil, errors.Wrap(err, "unable to get btc event") + } + + if event != nil { + return event, nil + } + + // TODO: integrate parsing script + + return nil, nil +} diff --git a/zetaclient/chains/bitcoin/tokenizer.go b/zetaclient/chains/bitcoin/tokenizer.go new file mode 100644 index 0000000000..5708bfa250 --- /dev/null +++ b/zetaclient/chains/bitcoin/tokenizer.go @@ -0,0 +1,162 @@ +package bitcoin + +import ( + "encoding/binary" + "fmt" + + "github.com/btcsuite/btcd/txscript" +) + +func newScriptTokenizer(script []byte) scriptTokenizer { + return scriptTokenizer{ + script: script, + offset: 0, + } +} + +// scriptTokenizer is supposed to be replaced by txscript.ScriptTokenizer. However, +// it seems currently the btcsuite version does not have ScriptTokenizer. A simplified +// version of that is implemented here. This is fully compatible with txscript.ScriptTokenizer +// one should consider upgrading txscript and remove this implementation +type scriptTokenizer struct { + script []byte + offset int + op byte + data []byte + err error +} + +// Done returns true when either all opcodes have been exhausted or a parse +// failure was encountered and therefore the state has an associated error. +func (t *scriptTokenizer) Done() bool { + return t.err != nil || t.offset >= len(t.script) +} + +// Data returns the data associated with the most recently successfully parsed +// opcode. +func (t *scriptTokenizer) Data() []byte { + return t.data +} + +// Err returns any errors currently associated with the tokenizer. This will +// only be non-nil in the case a parsing error was encountered. +func (t *scriptTokenizer) Err() error { + return t.err +} + +// Opcode returns the current opcode associated with the tokenizer. +func (t *scriptTokenizer) Opcode() byte { + return t.op +} + +// Next attempts to parse the next opcode and returns whether or not it was +// successful. It will not be successful if invoked when already at the end of +// the script, a parse failure is encountered, or an associated error already +// exists due to a previous parse failure. +// +// In the case of a true return, the parsed opcode and data can be obtained with +// the associated functions and the offset into the script will either point to +// the next opcode or the end of the script if the final opcode was parsed. +// +// In the case of a false return, the parsed opcode and data will be the last +// successfully parsed values (if any) and the offset into the script will +// either point to the failing opcode or the end of the script if the function +// was invoked when already at the end of the script. +// +// Invoking this function when already at the end of the script is not +// considered an error and will simply return false. +func (t *scriptTokenizer) Next() bool { + if t.Done() { + return false + } + + op := t.script[t.offset] + + // Only the following op_code will be encountered: + // OP_PUSHDATA*, OP_DATA_*, OP_CHECKSIG, OP_IF, OP_ENDIF, OP_FALSE + switch { + // No additional data. Note that some of the opcodes, notably OP_1NEGATE, + // OP_0, and OP_[1-16] represent the data themselves. + case op == txscript.OP_FALSE || op == txscript.OP_IF || op == txscript.OP_CHECKSIG || op == txscript.OP_ENDIF: + t.offset++ + t.op = op + t.data = nil + return true + + // Data pushes of specific lengths -- OP_DATA_[1-75]. + case op >= txscript.OP_DATA_1 && op <= txscript.OP_DATA_75: + script := t.script[t.offset:] + + // The length should be: int(op) - txscript.OP_DATA_1 + 2, i.e. op is txscript.OP_DATA_10, that means + // the data length should be 10, which is txscript.OP_DATA_10 - txscript.OP_DATA_1 + 1. + // Here, 2 instead of 1 because `script` also includes the opcode which means it contains one more byte. + // Since txscript.OP_DATA_1 is 1, then length is just int(op) - 1 + 2 = int(op) + 1 + length := int(op) + 1 + if len(script) < length { + t.err = fmt.Errorf("opcode %d detected, but script only %d bytes remaining", op, len(script)) + return false + } + + // Move the offset forward and set the opcode and data accordingly. + t.offset += length + t.op = op + t.data = script[1:length] + return true + + case op > txscript.OP_PUSHDATA4: + t.err = fmt.Errorf("unexpected op code %d", op) + return false + + // Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}. + default: + var length int + switch op { + case txscript.OP_PUSHDATA1: + length = 1 + case txscript.OP_PUSHDATA2: + length = 2 + case txscript.OP_PUSHDATA4: + length = 4 + default: + t.err = fmt.Errorf("unexpected op code %d", op) + return false + } + + script := t.script[t.offset+1:] + if len(script) < length { + t.err = fmt.Errorf("opcode %d requires %d bytes, only %d remaining", op, length, len(script)) + return false + } + + // Next -length bytes are little endian length of data. + var dataLen int + switch length { + case 1: + dataLen = int(script[0]) + case 2: + dataLen = int(binary.LittleEndian.Uint16(script[:length])) + case 4: + dataLen = int(binary.LittleEndian.Uint32(script[:length])) + default: + t.err = fmt.Errorf("invalid opcode length %d", length) + return false + } + + // Move to the beginning of the data. + script = script[length:] + + // Disallow entries that do not fit script or were sign extended. + if dataLen > len(script) || dataLen < 0 { + t.err = fmt.Errorf("opcode %d pushes %d bytes, only %d remaining", op, dataLen, len(script)) + return false + } + + // Move the offset forward and set the opcode and data accordingly. + // 1 is the opcode size, which is just 1 byte. int(op) is the opcode value, + // it should not be mixed with the size. + t.offset += 1 + length + dataLen + t.op = op + t.data = script[:dataLen] + return true + } +} diff --git a/zetaclient/chains/bitcoin/tx_script.go b/zetaclient/chains/bitcoin/tx_script.go index b5f0bed226..8c0ebf5ff4 100644 --- a/zetaclient/chains/bitcoin/tx_script.go +++ b/zetaclient/chains/bitcoin/tx_script.go @@ -192,6 +192,36 @@ func DecodeOpReturnMemo(scriptHex string, txid string) ([]byte, bool, error) { return nil, false, nil } +// DecodeScript decodes memo wrapped in an inscription like script in witness +// returns (memo, found, error) +// +// Note: the format of the script is following that of "inscription" defined in ordinal theory. +// However, to separate from inscription (as this use case is not an NFT), simplifications are made. +// The bitcoin envelope script is as follows: +// OP_DATA_32 <32 byte of public key> OP_CHECKSIG +// OP_FALSE +// OP_IF +// +// OP_PUSH 0x... +// OP_PUSH 0x... +// +// OP_ENDIF +// There are no content-type or any other attributes, it's just raw bytes. +func DecodeScript(script []byte) ([]byte, bool, error) { + t := newScriptTokenizer(script) + + if err := checkInscriptionEnvelope(&t); err != nil { + return nil, false, errors.Wrap(err, "checkInscriptionEnvelope: unable to check the envelope") + } + + memoBytes, err := decodeInscriptionPayload(&t) + if err != nil { + return nil, false, errors.Wrap(err, "decodeInscriptionPayload: unable to decode the payload") + } + + return memoBytes, true, nil +} + // EncodeAddress returns a human-readable payment address given a ripemd160 hash // and netID which encodes the bitcoin network and address type. It is used // in both pay-to-pubkey-hash (P2PKH) and pay-to-script-hash (P2SH) address @@ -245,3 +275,44 @@ func DecodeTSSVout(vout btcjson.Vout, receiverExpected string, chain chains.Chai return receiverVout, amount, nil } + +func decodeInscriptionPayload(t *scriptTokenizer) ([]byte, error) { + if !t.Next() || t.Opcode() != txscript.OP_FALSE { + return nil, fmt.Errorf("OP_FALSE not found") + } + + if !t.Next() || t.Opcode() != txscript.OP_IF { + return nil, fmt.Errorf("OP_IF not found") + } + + memo := make([]byte, 0) + var next byte + for t.Next() { + next = t.Opcode() + if next == txscript.OP_ENDIF { + return memo, nil + } + if next < txscript.OP_DATA_1 || next > txscript.OP_PUSHDATA4 { + return nil, fmt.Errorf("expecting data push, found %d", next) + } + memo = append(memo, t.Data()...) + } + if t.Err() != nil { + return nil, t.Err() + } + return nil, fmt.Errorf("should contain more data, but script ended") +} + +// checkInscriptionEnvelope decodes the envelope for the script monitoring. The format is +// OP_PUSHBYTES_32 <32 bytes> OP_CHECKSIG +func checkInscriptionEnvelope(t *scriptTokenizer) error { + if !t.Next() || t.Opcode() != txscript.OP_DATA_32 { + return fmt.Errorf("cannot obtain public key bytes op %d or err %s", t.Opcode(), t.Err()) + } + + if !t.Next() || t.Opcode() != txscript.OP_CHECKSIG { + return fmt.Errorf("cannot parse OP_CHECKSIG, op %d or err %s", t.Opcode(), t.Err()) + } + + return nil +} diff --git a/zetaclient/chains/bitcoin/tx_script_test.go b/zetaclient/chains/bitcoin/tx_script_test.go index eea97fc7b5..f1b17f2119 100644 --- a/zetaclient/chains/bitcoin/tx_script_test.go +++ b/zetaclient/chains/bitcoin/tx_script_test.go @@ -491,3 +491,66 @@ func TestDecodeTSSVoutErrors(t *testing.T) { require.Zero(t, amount) }) } + +func TestDecodeScript(t *testing.T) { + t.Run("should decode longer data ok", func(t *testing.T) { + // 600 bytes of random data generated offline + data := "2001a7bae79bd61c2368fe41a565061d6cf22b4f509fbc1652caea06d98b8fd0c7ac00634d0802c7faa771dd05f27993d22c42988758882d20080241074462884c8774e1cdf4b04e5b3b74b6568bd1769722708306c66270b6b2a7f68baced83627eeeb2d494e8a1749277b92a4c5a90b1b4f6038e5f704405515109d4d0021612ad298b8dad6e12245f8f0020e11a7a319652ba6abe261958201ce5e83131cd81302c0ecec60d4afa9f72540fc84b6b9c1f3d903ab25686df263b192a403a4aa22b799ba24369c49ff4042012589a07d4211e05f80f18a1262de5a1577ce0ec9e1fa9283cfa25d98d7d0b4217951dfcb8868570318c63f1e1424cfdb7d7a33c6b9e3ced4b2ffa0178b3a5fac8bace2991e382a402f56a2c6a9191463740910056483e4fd0f5ac729ffac66bf1b3ec4570c4e75c116f7d9fd65718ec3ed6c7647bf335b77e7d6a4e2011276dc8031b78403a1ad82c92fb339ec916c263b6dd0f003ba4381ad5410e90e88effbfa7f961b8e8a6011c525643a434f7abe2c1928a892cc57d6291831216c4e70cb80a39a79a3889211070e767c23db396af9b4c2093c3743d8cbcbfcb73d29361ecd3857e94ab3c800be1299fd36a5685ec60607a60d8c2e0f99ff0b8b9e86354d39a43041f7d552e95fe2d33b6fc0f540715da0e7e1b344c778afe73f82d00881352207b719f67dcb00b4ff645974d4fd7711363d26400e2852890cb6ea9cbfe63ac43080870049b1023be984331560c6350bb64da52b4b81bc8910934915f0a96701f4c50646d5386146596443bee9b2d116706e1687697fb42542196c1d764419c23a914896f9212946518ac59e1ba5d1fc37e503313133ebdf2ced5785e0eaa9738fe3f9ad73646e733931ebb7cff26e96106fe68" + script, _ := hex.DecodeString(data) + + memo, isFound, err := DecodeScript(script) + require.Nil(t, err) + require.True(t, isFound) + + // the expected memo + expected := "c7faa771dd05f27993d22c42988758882d20080241074462884c8774e1cdf4b04e5b3b74b6568bd1769722708306c66270b6b2a7f68baced83627eeeb2d494e8a1749277b92a4c5a90b1b4f6038e5f704405515109d4d0021612ad298b8dad6e12245f8f0020e11a7a319652ba6abe261958201ce5e83131cd81302c0ecec60d4afa9f72540fc84b6b9c1f3d903ab25686df263b192a403a4aa22b799ba24369c49ff4042012589a07d4211e05f80f18a1262de5a1577ce0ec9e1fa9283cfa25d98d7d0b4217951dfcb8868570318c63f1e1424cfdb7d7a33c6b9e3ced4b2ffa0178b3a5fac8bace2991e382a402f56a2c6a9191463740910056483e4fd0f5ac729ffac66bf1b3ec4570c4e75c116f7d9fd65718ec3ed6c7647bf335b77e7d6a4e2011276dc8031b78403a1ad82c92fb339ec916c263b6dd0f003ba4381ad5410e90e88effbfa7f961b8e8a6011c525643a434f7abe2c1928a892cc57d6291831216c4e70cb80a39a79a3889211070e767c23db396af9b4c2093c3743d8cbcbfcb73d29361ecd3857e94ab3c800be1299fd36a5685ec60607a60d8c2e0f99ff0b8b9e86354d39a43041f7d552e95fe2d33b6fc0f540715da0e7e1b344c778afe73f82d00881352207b719f67dcb00b4ff645974d4fd7711363d26400e2852890cb6ea9cbfe63ac43080870049b1023be984331560c6350bb64da52b4b81bc8910934915f0a96701f646d5386146596443bee9b2d116706e1687697fb42542196c1d764419c23a914896f9212946518ac59e1ba5d1fc37e503313133ebdf2ced5785e0eaa9738fe3f9ad73646e733931ebb7cff26e96106fe" + require.Equal(t, hex.EncodeToString(memo), expected) + }) + + t.Run("should decode shorter data ok", func(t *testing.T) { + // 81 bytes of random data generated offline + data := "20d6f59371037bf30115d9fd6016f0e3ef552cdfc0367ee20aa9df3158f74aaeb4ac00634c51bdd33073d76f6b4ae6510d69218100575eafabadd16e5faf9f42bd2fbbae402078bdcaa4c0413ce96d053e3c0bbd4d5944d6857107d640c248bdaaa7de959d9c1e6b9962b51428e5a554c28c397160881668" + script, _ := hex.DecodeString(data) + + memo, isFound, err := DecodeScript(script) + require.Nil(t, err) + require.True(t, isFound) + + // the expected memo + expected := "bdd33073d76f6b4ae6510d69218100575eafabadd16e5faf9f42bd2fbbae402078bdcaa4c0413ce96d053e3c0bbd4d5944d6857107d640c248bdaaa7de959d9c1e6b9962b51428e5a554c28c3971608816" + require.Equal(t, hex.EncodeToString(memo), expected) + }) + + t.Run("decode error due to missing data byte", func(t *testing.T) { + // missing OP_ENDIF at the end + data := "20cabd6ecc0245c40f27ca6299dcd3732287c317f3946734f04e27568fc5334218ac00634d0802000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004c500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000068" + script, _ := hex.DecodeString(data) + + memo, isFound, err := DecodeScript(script) + require.ErrorContains(t, err, "should contain more data, but script ended") + require.False(t, isFound) + require.Nil(t, memo) + }) + + t.Run("decode error due to missing data for public key", func(t *testing.T) { + // missing OP_ENDIF at the end + data := "2001a7bae79bd61c2368fe41a565061d6cf22b4f509fbc1652caea06d98b8fd0" + script, _ := hex.DecodeString(data) + + memo, isFound, err := DecodeScript(script) + require.ErrorContains(t, err, "cannot obtain public key bytes") + require.False(t, isFound) + require.Nil(t, memo) + }) + + t.Run("decode error due to missing OP_CHECKSIG", func(t *testing.T) { + // missing OP_ENDIF at the end + data := "2001a7bae79bd61c2368fe41a565061d6cf22b4f509fbc1652caea06d98b8fd0c7ab" + script, _ := hex.DecodeString(data) + + memo, isFound, err := DecodeScript(script) + require.ErrorContains(t, err, "cannot parse OP_CHECKSIG") + require.False(t, isFound) + require.Nil(t, memo) + }) +}