From 2ad1bb284cfd7e192fe7f554d206d100fdca9382 Mon Sep 17 00:00:00 2001
From: Shawn Tabrizi <shawntabrizi@gmail.com>
Date: Tue, 24 Sep 2024 15:57:02 -0400
Subject: [PATCH] use proof to hashes

---
 .../runtime/src/proving_trie/base16.rs        | 25 ++++++--
 .../runtime/src/proving_trie/base2.rs         | 59 +++++++++++++++----
 .../runtime/src/proving_trie/mod.rs           | 14 ++---
 3 files changed, 72 insertions(+), 26 deletions(-)

diff --git a/substrate/primitives/runtime/src/proving_trie/base16.rs b/substrate/primitives/runtime/src/proving_trie/base16.rs
index 59a53ece19e9..2dfa788f9d0e 100644
--- a/substrate/primitives/runtime/src/proving_trie/base16.rs
+++ b/substrate/primitives/runtime/src/proving_trie/base16.rs
@@ -24,8 +24,8 @@
 //! Proofs are created with latest substrate trie format (`LayoutV1`), and are not compatible with
 //! proofs using `LayoutV0`.
 
-use super::{ProofSizeToHashes, ProvingTrie, TrieError};
-use crate::{Decode, DispatchError, Encode};
+use super::{ProofToHashes, ProvingTrie, TrieError};
+use crate::{ArithmeticError, Decode, DispatchError, Encode};
 use codec::MaxEncodedLen;
 use sp_std::vec::Vec;
 use sp_trie::{
@@ -135,16 +135,29 @@ where
 	}
 }
 
-impl<Hashing, Key, Value> ProofSizeToHashes for BasicProvingTrie<Hashing, Key, Value>
+impl<Hashing, Key, Value> ProofToHashes for BasicProvingTrie<Hashing, Key, Value>
 where
 	Hashing: sp_core::Hasher,
 	Hashing::Out: MaxEncodedLen,
 {
-	fn proof_size_to_hashes(proof_size: &u32) -> u32 {
+	// This base 16 trie does not directly expose the depth of the trie, so we can roughly calculate
+	// it assuming the data in the proof are hashes, and the number of hashes present will tell us
+	// the depth of the trie.
+	fn proof_to_hashes(proof: &[u8]) -> Result<u32, DispatchError> {
+		let proof_size = proof.len() as u32;
 		let hash_len = Hashing::Out::max_encoded_len() as u32;
 		// A base 16 trie is expected to include the data for 15 hashes per layer.
-		let layer_len = 15 * hash_len;
-		(proof_size + layer_len - 1) / layer_len
+		let layer_len = hash_len.checked_mul(15).ok_or(ArithmeticError::Overflow)?;
+		let proof_size_round_up = proof_size
+			.checked_add(layer_len)
+			.ok_or(ArithmeticError::Overflow)?
+			.checked_sub(1)
+			.ok_or(ArithmeticError::Underflow)?;
+
+		let depth = proof_size_round_up
+			.checked_div(layer_len)
+			.ok_or(ArithmeticError::DivisionByZero)?;
+		Ok(depth)
 	}
 }
 
diff --git a/substrate/primitives/runtime/src/proving_trie/base2.rs b/substrate/primitives/runtime/src/proving_trie/base2.rs
index 812568941dbd..392eba546f60 100644
--- a/substrate/primitives/runtime/src/proving_trie/base2.rs
+++ b/substrate/primitives/runtime/src/proving_trie/base2.rs
@@ -20,7 +20,7 @@
 //! this library is designed to work more easily with runtime native types, which simply need to
 //! implement `Encode`/`Decode`.
 
-use super::{ProofSizeToHashes, ProvingTrie, TrieError};
+use super::{ProofToHashes, ProvingTrie, TrieError};
 use crate::{Decode, DispatchError, Encode};
 use binary_merkle_tree::{merkle_proof, merkle_root, MerkleProof};
 use codec::MaxEncodedLen;
@@ -102,21 +102,21 @@ where
 	}
 }
 
-impl<Hashing, Key, Value> ProofSizeToHashes for BasicProvingTrie<Hashing, Key, Value>
+impl<Hashing, Key, Value> ProofToHashes for BasicProvingTrie<Hashing, Key, Value>
 where
 	Hashing: sp_core::Hasher,
-	Hashing::Out: MaxEncodedLen,
+	Hashing::Out: MaxEncodedLen + Decode,
+	Key: Decode,
+	Value: Decode,
 {
-	fn proof_size_to_hashes(proof_size: &u32) -> u32 {
-		let hash_len = Hashing::Out::max_encoded_len() as u32;
-		// A base 2 trie is expected to include the data for 1 hash per layer.
-		let layer_len = 1 * hash_len;
-		// The proof includes `number_of_leaves: u32` and `leaf_index: u32`.
-		let proof_size = proof_size.saturating_sub(8);
-		// The implementation of this trie also includes the `key` and `value` encoded within the
-		// proof, but since we cannot know the "minimum" size of those items, we count it toward
-		// the number of hashes for a worst case scenario.
-		(proof_size + layer_len - 1) / layer_len
+	// This base 2 merkle trie includes the number of items in the trie, which we can directly use
+	// to figure out the depth of the trie.
+	fn proof_to_hashes(proof: &[u8]) -> Result<u32, DispatchError> {
+		let decoded_proof: MerkleProof<Hashing::Out, Vec<u8>> =
+			Decode::decode(&mut &proof[..]).map_err(|_| TrieError::IncompleteProof)?;
+		// Base 2 trie should have depth log2(n).
+		let depth = log2_rounded_up(decoded_proof.number_of_leaves);
+		Ok(depth)
 	}
 }
 
@@ -156,6 +156,22 @@ where
 	}
 }
 
+// This calculates a pessimistic log2 of a u32. For our needs `log2(0)` can be zero.
+fn log2_rounded_up(x: u32) -> u32 {
+	if x == 0 || x == 1 {
+		return 0;
+	}
+
+	let log2_floor = 31 - x.leading_zeros();
+
+	// If x is a power of 2, no need to round up. Otherwise, add 1 to round up.
+	if x & (x - 1) == 0 {
+		log2_floor
+	} else {
+		log2_floor + 1
+	}
+}
+
 #[cfg(test)]
 mod tests {
 	use super::*;
@@ -271,4 +287,21 @@ mod tests {
 		};
 		assert_eq!(constructed_proof, decoded_proof);
 	}
+
+	#[test]
+	fn log2_rounded_up_works() {
+		// Broad check.
+		let mut i: u32 = 1;
+		while i < 1_000_000_000 {
+			let log2 = (i as f64).log2().ceil() as u32;
+			assert_eq!(log2_rounded_up(i), log2);
+			i = i * 10;
+		}
+
+		// Explicit edge case check.
+		assert_eq!(log2_rounded_up(0), 0);
+		assert_eq!(log2_rounded_up(1), 0);
+		assert_eq!(log2_rounded_up(32), 5);
+		assert_eq!(log2_rounded_up(33), 6);
+	}
 }
diff --git a/substrate/primitives/runtime/src/proving_trie/mod.rs b/substrate/primitives/runtime/src/proving_trie/mod.rs
index 294a12ef218d..018415d32e70 100644
--- a/substrate/primitives/runtime/src/proving_trie/mod.rs
+++ b/substrate/primitives/runtime/src/proving_trie/mod.rs
@@ -141,9 +141,9 @@ where
 
 /// This trait is one strategy that can be used to benchmark a trie proof verification for the
 /// runtime. This strategy assumes that the majority complexity of verifying a merkle proof comes
-/// from computing hashes to recreate the merkle root. This trait converts the size of the proof, in
-/// bytes, to the number of hashes we expect to execute.
-pub trait ProofSizeToHashes {
+/// from computing hashes to recreate the merkle root. This trait converts the the proof, some
+/// bytes, to the number of hashes we expect to execute to verify that proof.
+pub trait ProofToHashes {
 	/// This function returns the number of hashes we expect to calculate based on the
 	/// size of the proof. This is used for benchmarking, so for worst case scenario, we should
 	/// round up.
@@ -152,7 +152,7 @@ pub trait ProofSizeToHashes {
 	/// to calculate the merkle root. For tries, it should be easy to predict the depth
 	/// of the trie (which is equivalent to the hashes), by looking at the size of the proof.
 	/// A rough estimate should be: `proof_size` / (`hash_size` * `num_hashes_per_layer`).
-	fn proof_size_to_hashes(proof_size: &u32) -> u32;
+	fn proof_to_hashes(proof: &[u8]) -> Result<u32, DispatchError>;
 }
 
 #[cfg(test)]
@@ -187,7 +187,7 @@ mod tests {
 	}
 
 	#[test]
-	fn proof_size_to_hashes() {
+	fn proof_to_hashes() {
 		// We can be off by up to 2 hashes... should be trivial.
 		let tolerance = 2;
 
@@ -203,7 +203,7 @@ mod tests {
 		while i < 10_000_000 {
 			let trie = BalanceTrie2::generate_for((0..i).map(|i| (i, u128::from(i)))).unwrap();
 			let proof = trie.create_proof(&(i / 2)).unwrap();
-			let hashes = BalanceTrie2::proof_size_to_hashes(&(proof.len() as u32));
+			let hashes = BalanceTrie2::proof_to_hashes(&proof).unwrap();
 			let log2 = (i as f64).log2().ceil() as u32;
 
 			assert!(abs_dif(hashes, log2) <= tolerance);
@@ -221,7 +221,7 @@ mod tests {
 		while i < 10_000_000 {
 			let trie = BalanceTrie16::generate_for((0..i).map(|i| (i, u128::from(i)))).unwrap();
 			let proof = trie.create_proof(&(i / 2)).unwrap();
-			let hashes = BalanceTrie16::proof_size_to_hashes(&(proof.len() as u32));
+			let hashes = BalanceTrie16::proof_to_hashes(&proof).unwrap();
 			let log16 = log16(i);
 
 			assert!(abs_dif(hashes, log16) <= tolerance);