From 7b6f49dd4a6018848c216a9905ab36e50f8559db Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Sun, 8 Sep 2024 16:47:03 +0000
Subject: [PATCH] expression: unify Taproot and non-Taproot parsing

The `expression::Tree` type now parses expression trees containing both
{} and () as children. It marks which is which, and it is the
responsibility of FromTree implementors to make sure that the correct
style is used.

This eliminates a ton of ad-hoc string parsing code from
descriptor/tr.rs, including 8 instances of Error::Unexpected. It is also
the first change that preserves (sorta) a pubkey parsing error rather
than converting it to a string.

Because of https://github.com/rust-bitcoin/rust-miniscript/issues/734
this inserts a call to `Descriptor::sanity_check` when parsing a string,
specifically for Taproot descriptors. This is weird and wrong but we
want to address it separately from this PR, whose goal is to preserve
all behavior.
---
 src/descriptor/mod.rs |  38 +++++--
 src/descriptor/tr.rs  | 234 +++++++++++++++++++-----------------------
 src/expression/mod.rs |  69 +++++--------
 3 files changed, 158 insertions(+), 183 deletions(-)
diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs
index a1d29b441..f0791dc1d 100644
--- a/src/descriptor/mod.rs
+++ b/src/descriptor/mod.rs
@@ -21,6 +21,7 @@ use bitcoin::{
 };
 use sync::Arc;
 
+use crate::expression::FromTree as _;
 use crate::miniscript::decode::Terminal;
 use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0};
 use crate::plan::{AssetProvider, Plan};
@@ -981,17 +982,17 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Descriptor<Pk> {
 impl<Pk: FromStrKey> FromStr for Descriptor<Pk> {
     type Err = Error;
     fn from_str(s: &str) -> Result<Descriptor<Pk>, Error> {
-        // tr tree parsing has special code
-        // Tr::from_str will check the checksum
-        // match "tr(" to handle more extensibly
-        let desc = if s.starts_with("tr(") {
-            Ok(Descriptor::Tr(Tr::from_str(s)?))
-        } else {
-            let top = expression::Tree::from_str(s)?;
-            expression::FromTree::from_tree(&top)
-        }?;
-
-        Ok(desc)
+        let top = expression::Tree::from_str(s)?;
+        let ret = Self::from_tree(&top)?;
+        if let Descriptor::Tr(ref inner) = ret {
+            // FIXME preserve weird/broken behavior from 12.x.
+            // See https://github.com/rust-bitcoin/rust-miniscript/issues/734
+            ret.sanity_check()?;
+            for (_, ms) in inner.iter_scripts() {
+                ms.ext_check(&crate::miniscript::analyzable::ExtParams::sane())?;
+            }
+        }
+        Ok(ret)
     }
 }
 
@@ -1545,6 +1546,21 @@ mod tests {
         )
     }
 
+    #[test]
+    fn tr_named_branch() {
+        use crate::{ParseError, ParseTreeError};
+
+        assert!(matches!(
+            StdDescriptor::from_str(
+                "tr(0202d44008000010100000000084F0000000dd0dd00000000000201dceddd00d00,abc{0,0})"
+            ),
+            Err(Error::Parse(ParseError::Tree(ParseTreeError::IncorrectName {
+                expected: "",
+                ..
+            }))),
+        ));
+    }
+
     #[test]
     fn roundtrip_tests() {
         let descriptor = Descriptor::<bitcoin::PublicKey>::from_str("multi");
diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs
index 486162deb..cc0dd1945 100644
--- a/src/descriptor/tr.rs
+++ b/src/descriptor/tr.rs
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: CC0-1.0
 
-use core::str::FromStr;
 use core::{cmp, fmt, hash};
 
 #[cfg(not(test))] // https://github.com/rust-lang/rust/issues/121684
@@ -12,9 +11,10 @@ use bitcoin::taproot::{
 use bitcoin::{opcodes, Address, Network, ScriptBuf, Weight};
 use sync::Arc;
 
-use super::checksum::{self, verify_checksum};
+use super::checksum;
 use crate::descriptor::DefiniteDescriptorKey;
 use crate::expression::{self, FromTree};
+use crate::iter::TreeLike as _;
 use crate::miniscript::satisfy::{Placeholder, Satisfaction, SchnorrSigType, Witness};
 use crate::miniscript::Miniscript;
 use crate::plan::AssetProvider;
@@ -23,8 +23,8 @@ use crate::policy::Liftable;
 use crate::prelude::*;
 use crate::util::{varint_len, witness_size};
 use crate::{
-    Error, ForEachKey, FromStrKey, MiniscriptKey, Satisfier, ScriptContext, Tap, Threshold,
-    ToPublicKey, TranslateErr, Translator,
+    Error, ForEachKey, FromStrKey, MiniscriptKey, ParseError, Satisfier, ScriptContext, Tap,
+    Threshold, ToPublicKey, TranslateErr, Translator,
 };
 
 /// A Taproot Tree representation.
@@ -490,79 +490,114 @@ where
     }
 }
 
-#[rustfmt::skip]
-impl<Pk: FromStrKey> Tr<Pk> {
-    // Helper function to parse taproot script path
-    fn parse_tr_script_spend(tree: &expression::Tree,) -> Result<TapTree<Pk>, Error> {
-        match tree {
-            expression::Tree { name, args, .. } if !name.is_empty() && args.is_empty() => {
-                let script = Miniscript::<Pk, Tap>::from_str(name)?;
-                Ok(TapTree::Leaf(Arc::new(script)))
-            }
-            expression::Tree { name, args, .. } if name.is_empty() && args.len() == 2 => {
-                let left = Self::parse_tr_script_spend(&args[0])?;
-                let right = Self::parse_tr_script_spend(&args[1])?;
-                Ok(TapTree::combine(left, right))
-            }
-            _ =>  {
-                Err(Error::Unexpected(
-                "unknown format for script spending paths while parsing taproot descriptor"
-                    .to_string(),
-            ))},
-        }
+impl<Pk: FromStrKey> core::str::FromStr for Tr<Pk> {
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let expr_tree = expression::Tree::from_str(s)?;
+        Self::from_tree(&expr_tree)
     }
 }
 
 impl<Pk: FromStrKey> crate::expression::FromTree for Tr<Pk> {
-    fn from_tree(top: &expression::Tree) -> Result<Self, Error> {
-        if top.name == "tr" {
-            match top.args.len() {
-                1 => {
-                    let key = &top.args[0];
-                    if !key.args.is_empty() {
-                        return Err(Error::Unexpected(format!(
-                            "#{} script associated with `key-path` while parsing taproot descriptor",
-                            key.args.len()
-                        )));
+    fn from_tree(expr_tree: &expression::Tree) -> Result<Self, Error> {
+        use crate::expression::{Parens, ParseTreeError};
+
+        expr_tree
+            .verify_toplevel("tr", 1..=2)
+            .map_err(From::from)
+            .map_err(Error::Parse)?;
+
+        let mut round_paren_depth = 0;
+
+        let mut internal_key = None;
+        let mut tree_stack = vec![];
+
+        for item in expr_tree.verbose_pre_order_iter() {
+            // Top-level "tr" node.
+            if item.index == 0 {
+                if item.is_complete {
+                    debug_assert!(
+                        internal_key.is_some(),
+                        "checked above that top-level 'tr' has children"
+                    );
+                }
+            } else if item.index == 1 {
+                // First child of tr, which must be the internal key
+                if !item.node.args.is_empty() {
+                    return Err(Error::Parse(ParseError::Tree(
+                        ParseTreeError::IncorrectNumberOfChildren {
+                            description: "internal key",
+                            n_children: item.node.args.len(),
+                            minimum: Some(0),
+                            maximum: Some(0),
+                        },
+                    )));
+                }
+                internal_key = Some(
+                    Pk::from_str(item.node.name)
+                        .map_err(ParseError::box_from_str)
+                        .map_err(Error::Parse)?,
+                );
+            } else {
+                // From here on we are into the taptree.
+                if item.n_children_yielded == 0 {
+                    match item.node.parens {
+                        Parens::Curly => {
+                            if !item.node.name.is_empty() {
+                                return Err(Error::Parse(ParseError::Tree(
+                                    ParseTreeError::IncorrectName {
+                                        actual: item.node.name.to_owned(),
+                                        expected: "",
+                                    },
+                                )));
+                            }
+                            if round_paren_depth > 0 {
+                                return Err(Error::Parse(ParseError::Tree(
+                                    ParseTreeError::IllegalCurlyBrace {
+                                        pos: item.node.children_pos,
+                                    },
+                                )));
+                            }
+                        }
+                        Parens::Round => round_paren_depth += 1,
+                        _ => {}
                     }
-                    Tr::new(expression::terminal(key, Pk::from_str)?, None)
                 }
-                2 => {
-                    let key = &top.args[0];
-                    if !key.args.is_empty() {
-                        return Err(Error::Unexpected(format!(
-                            "#{} script associated with `key-path` while parsing taproot descriptor",
-                            key.args.len()
-                        )));
+                if item.is_complete {
+                    if item.node.parens == Parens::Curly {
+                        if item.n_children_yielded == 2 {
+                            let rchild = tree_stack.pop().unwrap();
+                            let lchild = tree_stack.pop().unwrap();
+                            tree_stack.push(TapTree::combine(lchild, rchild));
+                        } else {
+                            return Err(Error::Parse(ParseError::Tree(
+                                ParseTreeError::IncorrectNumberOfChildren {
+                                    description: "Taptree node",
+                                    n_children: item.n_children_yielded,
+                                    minimum: Some(2),
+                                    maximum: Some(2),
+                                },
+                            )));
+                        }
+                    } else {
+                        if item.node.parens == Parens::Round {
+                            round_paren_depth -= 1;
+                        }
+                        if round_paren_depth == 0 {
+                            let script = Miniscript::from_tree(item.node)?;
+                            // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734
+                            if script.ty.corr.base != crate::miniscript::types::Base::B {
+                                return Err(Error::NonTopLevel(format!("{:?}", script)));
+                            };
+                            tree_stack.push(TapTree::Leaf(Arc::new(script)));
+                        }
                     }
-                    let tree = &top.args[1];
-                    let ret = Self::parse_tr_script_spend(tree)?;
-                    Tr::new(expression::terminal(key, Pk::from_str)?, Some(ret))
                 }
-                _ => Err(Error::Unexpected(format!(
-                    "{}[#{} args] while parsing taproot descriptor",
-                    top.name,
-                    top.args.len()
-                ))),
             }
-        } else {
-            Err(Error::Unexpected(format!(
-                "{}[#{} args] while parsing taproot descriptor",
-                top.name,
-                top.args.len()
-            )))
         }
-    }
-}
-
-impl<Pk: FromStrKey> core::str::FromStr for Tr<Pk> {
-    type Err = Error;
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let desc_str = verify_checksum(s)
-            .map_err(From::from)
-            .map_err(Error::ParseTree)?;
-        let top = parse_tr_tree(desc_str)?;
-        Self::from_tree(&top)
+        assert!(tree_stack.len() <= 1);
+        Tr::new(internal_key.unwrap(), tree_stack.pop())
     }
 }
 
@@ -588,69 +623,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Tr<Pk> {
     }
 }
 
-// Helper function to parse string into miniscript tree form
-fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
-    if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' {
-        let rest = &s[3..s.len() - 1];
-        if !rest.contains(',') {
-            let key = expression::Tree::from_str(rest)?;
-            if !key.args.is_empty() {
-                return Err(Error::Unexpected("invalid taproot internal key".to_string()));
-            }
-            let internal_key = expression::Tree {
-                name: key.name,
-                parens: expression::Parens::None,
-                children_pos: 0,
-                args: vec![],
-            };
-            return Ok(expression::Tree {
-                name: "tr",
-                parens: expression::Parens::Round,
-                children_pos: 0,
-                args: vec![internal_key],
-            });
-        }
-        // use str::split_once() method to refactor this when compiler version bumps up
-        let (key, script) = split_once(rest, ',')
-            .ok_or_else(|| Error::BadDescriptor("invalid taproot descriptor".to_string()))?;
-
-        let key = expression::Tree::from_str(key)?;
-        if !key.args.is_empty() {
-            return Err(Error::Unexpected("invalid taproot internal key".to_string()));
-        }
-        let internal_key = expression::Tree {
-            name: key.name,
-            parens: expression::Parens::None,
-            children_pos: 0,
-            args: vec![],
-        };
-        let tree = expression::Tree::from_slice_delim(script, expression::Deliminator::Taproot)
-            .map_err(Error::ParseTree)?;
-        Ok(expression::Tree {
-            name: "tr",
-            parens: expression::Parens::Round,
-            children_pos: 0,
-            args: vec![internal_key, tree],
-        })
-    } else {
-        Err(Error::Unexpected("invalid taproot descriptor".to_string()))
-    }
-}
-
-fn split_once(inp: &str, delim: char) -> Option<(&str, &str)> {
-    if inp.is_empty() {
-        None
-    } else {
-        // find the first character that matches delim
-        let res = inp
-            .chars()
-            .position(|ch| ch == delim)
-            .map(|idx| (&inp[..idx], &inp[idx + 1..]))
-            .unwrap_or((inp, ""));
-        Some(res)
-    }
-}
-
 impl<Pk: MiniscriptKey> Liftable<Pk> for TapTree<Pk> {
     fn lift(&self) -> Result<Policy<Pk>, Error> {
         fn lift_helper<Pk: MiniscriptKey>(s: &TapTree<Pk>) -> Result<Policy<Pk>, Error> {
@@ -767,6 +739,8 @@ where
 
 #[cfg(test)]
 mod tests {
+    use core::str::FromStr;
+
     use super::*;
 
     fn descriptor() -> String {
diff --git a/src/expression/mod.rs b/src/expression/mod.rs
index 6f8367827..8b0a4258f 100644
--- a/src/expression/mod.rs
+++ b/src/expression/mod.rs
@@ -71,15 +71,6 @@ pub enum Parens {
     Curly,
 }
 
-/// Whether to treat `{` and `}` as deliminators when parsing an expression.
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub enum Deliminator {
-    /// Use `(` and `)` as parentheses.
-    NonTaproot,
-    /// Use `{` and `}` as parentheses.
-    Taproot,
-}
-
 /// A trait for extracting a structure from a Tree representation in token form
 pub trait FromTree: Sized {
     /// Extract a structure from Tree representation
@@ -156,16 +147,11 @@ impl<'a> Tree<'a> {
         Ok(())
     }
 
-    /// Parse an expression with round brackets
-    pub fn from_slice(sl: &'a str) -> Result<Tree<'a>, ParseTreeError> {
-        Self::from_slice_delim(sl, Deliminator::NonTaproot)
-    }
-
     /// Check that a string is a well-formed expression string, with optional
     /// checksum.
     ///
     /// Returns the string with the checksum removed and its tree depth.
-    fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<(&str, usize), ParseTreeError> {
+    fn parse_pre_check(s: &str) -> Result<(&str, usize), ParseTreeError> {
         // First, scan through string to make sure it is well-formed.
         // Do ASCII/checksum check first; after this we can use .bytes().enumerate() rather
         // than .char_indices(), which is *significantly* faster.
@@ -174,12 +160,12 @@ impl<'a> Tree<'a> {
         let mut max_depth = 0;
         let mut open_paren_stack = Vec::with_capacity(128);
         for (pos, ch) in s.bytes().enumerate() {
-            if ch == open {
+            if ch == b'(' || ch == b'{' {
                 open_paren_stack.push((ch, pos));
                 if max_depth < open_paren_stack.len() {
                     max_depth = open_paren_stack.len();
                 }
-            } else if ch == close {
+            } else if ch == b')' || ch == b'}' {
                 if let Some((open_ch, open_pos)) = open_paren_stack.pop() {
                     if (open_ch == b'(' && ch == b'}') || (open_ch == b'{' && ch == b')') {
                         return Err(ParseTreeError::MismatchedParens {
@@ -250,26 +236,26 @@ impl<'a> Tree<'a> {
         Ok((s, max_depth))
     }
 
-    pub(crate) fn from_slice_delim(s: &'a str, delim: Deliminator) -> Result<Self, ParseTreeError> {
-        let (oparen, cparen) = match delim {
-            Deliminator::NonTaproot => (b'(', b')'),
-            Deliminator::Taproot => (b'{', b'}'),
-        };
+    /// Parses a tree from a string
+    #[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes.
+    pub fn from_str(s: &'a str) -> Result<Self, Error> {
+        Self::from_str_inner(s).map_err(Error::ParseTree)
+    }
 
+    fn from_str_inner(s: &'a str) -> Result<Self, ParseTreeError> {
         // First, scan through string to make sure it is well-formed.
-        let (s, max_depth) = Self::parse_pre_check(s, oparen, cparen)?;
+        let (s, max_depth) = Self::parse_pre_check(s)?;
 
         // Now, knowing it is sane and well-formed, we can easily parse it backward,
         // which will yield a post-order right-to-left iterator of its nodes.
         let mut stack = Vec::with_capacity(max_depth);
         let mut children_parens: Option<(Vec<_>, usize, Parens)> = None;
         let mut node_name_end = s.len();
-        let mut tapleaf_depth = 0;
         for (pos, ch) in s.bytes().enumerate().rev() {
-            if ch == cparen {
+            if ch == b')' || ch == b'}' {
                 stack.push(vec![]);
                 node_name_end = pos;
-            } else if tapleaf_depth == 0 && ch == b',' {
+            } else if ch == b',' {
                 let (mut args, children_pos, parens) =
                     children_parens
                         .take()
@@ -281,7 +267,7 @@ impl<'a> Tree<'a> {
                     Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args };
                 top.push(new_tree);
                 node_name_end = pos;
-            } else if ch == oparen {
+            } else if ch == b'(' || ch == b'{' {
                 let (mut args, children_pos, parens) =
                     children_parens
                         .take()
@@ -302,10 +288,6 @@ impl<'a> Tree<'a> {
                     },
                 ));
                 node_name_end = pos;
-            } else if delim == Deliminator::Taproot && ch == b'(' {
-                tapleaf_depth += 1;
-            } else if delim == Deliminator::Taproot && ch == b')' {
-                tapleaf_depth -= 1;
             }
         }
 
@@ -318,12 +300,6 @@ impl<'a> Tree<'a> {
         Ok(Tree { name: &s[..node_name_end], children_pos, parens, args })
     }
 
-    /// Parses a tree from a string
-    #[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes.
-    pub fn from_str(s: &'a str) -> Result<Tree<'a>, Error> {
-        Self::from_slice_delim(s, Deliminator::NonTaproot).map_err(Error::ParseTree)
-    }
-
     /// Parses an expression tree as a threshold (a term with at least one child,
     /// the first of which is a positive integer k).
     ///
@@ -427,6 +403,16 @@ mod tests {
         Tree { name, parens: Parens::Round, children_pos: name.len(), args }
     }
 
+    fn brace_node<'a>(name: &'a str, mut args: Vec<Tree<'a>>) -> Tree<'a> {
+        let mut offset = name.len() + 1; // +1 for open paren
+        for arg in &mut args {
+            arg.children_pos += offset;
+            offset += arg.name.len() + 1; // +1 for comma
+        }
+
+        Tree { name, parens: Parens::Curly, children_pos: name.len(), args }
+    }
+
     #[test]
     fn test_parse_num() {
         assert!(parse_num("0").is_ok());
@@ -518,11 +504,10 @@ mod tests {
 
     #[test]
     fn parse_tree_taproot() {
-        // This test will change in a later PR which unifies TR and non-TR parsing.
-        assert!(matches!(
-            Tree::from_str("a{b(c),d}").unwrap_err(),
-            Error::ParseTree(ParseTreeError::TrailingCharacter { ch: ',', pos: 6 }),
-        ));
+        assert_eq!(
+            Tree::from_str("a{b(c),d}").unwrap(),
+            brace_node("a", vec![paren_node("b", vec![leaf("c")]), leaf("d")]),
+        );
     }
 
     #[test]