Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Erasure namespaces #235

Merged
merged 32 commits into from
Mar 29, 2021
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
51d310d
types: erasure namespaces
evan-forbes Mar 18, 2021
ddbaa9c
use methods instead of fields
evan-forbes Mar 18, 2021
ed06600
remove unnecessary compteShares call
evan-forbes Mar 18, 2021
1ff8b30
test block recovery
evan-forbes Mar 18, 2021
a4c6eaf
don't mess things up by using power of two
evan-forbes Mar 19, 2021
a2f2ba1
use better const names
evan-forbes Mar 19, 2021
dd6d65f
types: erasure namespaces
evan-forbes Mar 18, 2021
f390524
use methods instead of fields
evan-forbes Mar 18, 2021
43ad1fa
remove unnecessary compteShares call
evan-forbes Mar 18, 2021
9bec234
test block recovery
evan-forbes Mar 18, 2021
3d45d8a
don't mess things up by using power of two
evan-forbes Mar 19, 2021
964e8fc
use better const names
evan-forbes Mar 19, 2021
3482ebb
remove print statements
evan-forbes Mar 19, 2021
4de5bb6
add docs to AdjustedMessageSize
evan-forbes Mar 22, 2021
c8c4667
used the types.AdjustedMessageSize instead of the local
evan-forbes Mar 22, 2021
d9ce75b
Merge branch 'evan/erasure-namespaces' of github.com:lazyledger/lazyl…
evan-forbes Mar 27, 2021
772ae43
moving out consts to avoid import cycle
evan-forbes Mar 28, 2021
2724fe8
re applying stashed changes
evan-forbes Mar 28, 2021
3f63b02
return new instances of the wrapper per call of the constructor
evan-forbes Mar 29, 2021
173ddd7
implement tests for recovering data from a square using the new nmt w…
evan-forbes Mar 29, 2021
e4e0c74
fix sneaky append bug
evan-forbes Mar 29, 2021
001ada7
Merge branch 'evan/erasure-namespaces-II' into evan/erasure-namespaces
evan-forbes Mar 29, 2021
b1bc22d
remove moved test
evan-forbes Mar 29, 2021
d882848
remove uneeded function
evan-forbes Mar 29, 2021
b7bdfb7
remove comment
evan-forbes Mar 29, 2021
d84fc3d
clean up
evan-forbes Mar 29, 2021
f708503
fix imports
evan-forbes Mar 29, 2021
efdd4f0
linter gods
evan-forbes Mar 29, 2021
43a6bd7
more linter fixes
evan-forbes Mar 29, 2021
6baddea
use appendCopy instead of manually copying
evan-forbes Mar 29, 2021
64e5bfd
change name of generateRandomData to generateRandomMsgOnlyData
evan-forbes Mar 29, 2021
8bba9d9
use append instead of a utiltiy function
evan-forbes Mar 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions p2p/ipld/nmt_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ type ErasuredNamespacedMerkleTree struct {

// NewErasuredNamespacedMerkleTree issues a new ErasuredNamespacedMerkleTree
func NewErasuredNamespacedMerkleTree(squareSize uint64, setters ...nmt.Option) ErasuredNamespacedMerkleTree {
return ErasuredNamespacedMerkleTree{squareSize: squareSize, options: setters}
tree := nmt.New(sha256.New(), setters...)
return ErasuredNamespacedMerkleTree{squareSize: squareSize, options: setters, tree: tree}
}

// Constructor acts as the rsmt2d.TreeConstructorFn for
// ErasuredNamespacedMerkleTree
func (w ErasuredNamespacedMerkleTree) Constructor() rsmt2d.Tree {
w.tree = nmt.New(sha256.New(), w.options...)
return &w
newTree := NewErasuredNamespacedMerkleTree(w.squareSize, w.options...)
return &newTree
}

// Push adds the provided data to the underlying NamespaceMerkleTree, and
Expand Down
102 changes: 101 additions & 1 deletion p2p/ipld/read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package ipld
import (
"bytes"
"context"
"crypto/rand"
"crypto/sha256"
"math"
"math/rand"
"sort"
"strings"
"testing"
Expand All @@ -18,7 +19,9 @@ import (
"github.com/lazyledger/lazyledger-core/p2p/ipld/plugin/nodes"
"github.com/lazyledger/lazyledger-core/types"
"github.com/lazyledger/nmt"
"github.com/lazyledger/rsmt2d"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestLeafPath(t *testing.T) {
Expand Down Expand Up @@ -153,6 +156,88 @@ func TestGetLeafData(t *testing.T) {
}
}

func TestBlockRecovery(t *testing.T) {
// adjustedLeafSize describes the size of a leaf that will not get split
adjustedLeafSize := types.MsgShareSize

originalSquareWidth := 2
sharecount := originalSquareWidth * originalSquareWidth
extendedSquareWidth := originalSquareWidth * originalSquareWidth
extendedShareCount := extendedSquareWidth * extendedSquareWidth

// generate test data
quarterShares := generateRandNamespacedRawData(sharecount, types.NamespaceSize, adjustedLeafSize)
allShares := generateRandNamespacedRawData(sharecount, types.NamespaceSize, adjustedLeafSize)

testCases := []struct {
name string
// blockData types.Data
shares [][]byte
expectErr bool
errString string
d int // number of shares to delete
}{
// missing more shares causes RepairExtendedDataSquare to hang see
// https://github.com/lazyledger/rsmt2d/issues/21
{"missing 1/4 shares", quarterShares, false, "", extendedShareCount / 4},
{"missing all but one shares", allShares, true, "failed to solve data square", extendedShareCount - 1},
evan-forbes marked this conversation as resolved.
Show resolved Hide resolved
}
for _, tc := range testCases {
tc := tc

t.Run(tc.name, func(t *testing.T) {
squareSize := uint64(math.Sqrt(float64(len(tc.shares))))

// create trees for creating roots
tree := NewErasuredNamespacedMerkleTree(squareSize)
recoverTree := NewErasuredNamespacedMerkleTree(squareSize)

eds, err := rsmt2d.ComputeExtendedDataSquare(tc.shares, rsmt2d.RSGF8, tree.Constructor)
if err != nil {
t.Error(err)
}

// calculate roots using the first complete square
rowRoots := eds.RowRoots()
colRoots := eds.ColumnRoots()

flat := flatten(eds)

// recover a partially complete square
reds, err := rsmt2d.RepairExtendedDataSquare(
rowRoots,
colRoots,
removeRandShares(flat, tc.d),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I'd recommend introducing a local var that gets assigned the result of removeRandShares. It slightly increases readability here I think.

rsmt2d.RSGF8,
recoverTree.Constructor,
)

if tc.expectErr {
require.Error(t, err)
require.Contains(t, err.Error(), tc.errString)
return
}

require.NoError(t, err)

// check that the squares are equal
assert.Equal(t, flatten(eds), flatten(reds))
})
}
}

func flatten(eds *rsmt2d.ExtendedDataSquare) [][]byte {
out := make([][]byte, eds.Width()*eds.Width())
count := 0
for i := uint(0); i < eds.Width(); i++ {
for _, share := range eds.Row(i) {
out[count] = share
count++
}
}
return out
}

// nmtcommitment generates the nmt root of some namespaced data
func createNmtTree(
ctx context.Context,
Expand Down Expand Up @@ -199,3 +284,18 @@ func generateRandNamespacedRawData(total int, nidSize int, leafSize int) [][]byt
func sortByteArrays(src [][]byte) {
sort.Slice(src, func(i, j int) bool { return bytes.Compare(src[i], src[j]) < 0 })
}

// removes d shares from data
func removeRandShares(data [][]byte, d int) [][]byte {
count := len(data)
// remove shares randomly
for i := 0; i < d; {
ind := rand.Intn(count)
if len(data[ind]) == 0 {
continue
}
data[ind] = nil
i++
}
return data
}
11 changes: 5 additions & 6 deletions types/block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
// number generator here and we can run the tests a bit faster
stdbytes "bytes"
"context"
"crypto/rand"
"encoding/hex"
"math"
"math/rand"
"os"
"reflect"
"sort"
Expand Down Expand Up @@ -195,8 +195,8 @@ func makeBlockIDRandom() BlockID {
blockHash = make([]byte, tmhash.Size)
partSetHash = make([]byte, tmhash.Size)
)
rand.Read(blockHash) //nolint: errcheck // ignore errcheck for read
rand.Read(partSetHash) //nolint: errcheck // ignore errcheck for read
rand.Read(blockHash)
rand.Read(partSetHash)
return BlockID{blockHash, PartSetHeader{123, partSetHash}}
}

Expand Down Expand Up @@ -1360,7 +1360,6 @@ func TestPutBlock(t *testing.T) {
defer cancel()

block.fillDataAvailabilityHeader()
tc.blockData.ComputeShares()
for _, rowRoot := range block.DataAvailabilityHeader.RowsRoots.Bytes() {
// recreate the cids using only the computed roots
cid, err := nodes.CidFromNamespacedSha256(rowRoot)
Expand Down Expand Up @@ -1389,8 +1388,8 @@ func TestPutBlock(t *testing.T) {

func generateRandomData(msgCount int) Data {
out := make([]Message, msgCount)
for i, msg := range generateRandNamespacedRawData(msgCount, NamespaceSize, ShareSize) {
out[i] = Message{NamespaceID: msg[:NamespaceSize], Data: msg[:NamespaceSize]}
for i, msg := range generateRandNamespacedRawData(msgCount, NamespaceSize, MsgShareSize-2) {
out[i] = Message{NamespaceID: msg[:NamespaceSize], Data: msg[NamespaceSize:]}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the extra -2 here is so that the msg shares don't unexpectedly split when adding the length delimiter.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should test with really large messages (where the length delimiter will be longer than one byte) too.

This is probably orthogonal to this PR but: generateRandomData is confusing. It sounds like it will really create arbitrary Block.Data but it seems it only creates messages and these are already accounting for the length delimiter and the share reserved bytes. In reality, messages in Block.Data will often be longer than the MsgShareSize-2.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, we should definitely test with larger message sizes, random txs, and ISRs. The issue I ran into is that we need to have a power of two square size for the ipld plugin to use generate Cids as expected (ref #257), and we don't actually have a way to know how many shares a message takes up until after the shares are computed ref #77.

I've changed the name of generateRandomData to generateRandomMsgOnlyData to hopefully make things slightly clearer, but it does not address the underlying issue. 64e5bfd

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just as an update, Ismail and I figured out the issues, and it was the last copy-append bug. Everything works as expected now, and we are planning on adding robust testing for computing shares from block data later when completing #257

}
return Data{
Messages: Messages{MessagesList: out},
Expand Down
13 changes: 8 additions & 5 deletions types/shares.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,16 @@ func (m Message) MarshalDelimited() ([]byte, error) {
lenBuf := make([]byte, binary.MaxVarintLen64)
length := uint64(len(m.Data))
n := binary.PutUvarint(lenBuf, length)

return append(lenBuf[:n], m.Data...), nil
}

// appendToShares appends raw data as shares.
// Used for messages.
func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
func appendToShares(shares []NamespacedShare, id namespace.ID, rawData []byte) []NamespacedShare {
nid := make([]byte, len(id))
copy(nid, id)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

had to add an extra copy here to stop another overwrite when appending bug. This one was super sneaky, as it only pops up when calling ComputeShares more than once. I went a head and added two more copies elsewhere, as they would become problems eventually.

Copy link
Member

@liamsi liamsi Mar 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, we should use append correctly instead of copy. I think this is easier to grasp (also here it removes the need to introduce another variable for nid):

rawShare := append(append(
	make([]byte, 0, len(nid)+len(rawData)),
	nid...),
	rawData...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That certainly makes it more readable, but I don't think it will work for our purposes https://play.golang.org/p/NN4V2gaLh8k

We can however use a new utility function appendCopy 6baddea

Copy link
Member

@liamsi liamsi Mar 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's my bad! There is a 0 missing in my suggested code. The created slice needs to have 0 length and the exactly needed size allocated as capacity.

This is how it should look like: https://play.golang.org/p/oorWoD02jZg

I've updated the comments accordingly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh okay, I should have guessed, makes sense!

if len(rawData) <= MsgShareSize {
rawShare := []byte(append(nid, rawData...))
rawShare := append(nid, rawData...)
paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
share := NamespacedShare{paddedShare, nid}
shares = append(shares, share)
Expand All @@ -72,17 +73,19 @@ func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte)

// splitContiguous splits multiple raw data contiguously as shares.
// Used for transactions, intermediate state roots, and evidence.
func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
func splitContiguous(id namespace.ID, rawDatas [][]byte) []NamespacedShare {
shares := make([]NamespacedShare, 0)
// Index into the outer slice of rawDatas
outerIndex := 0
// Index into the inner slice of rawDatas
innerIndex := 0
for outerIndex < len(rawDatas) {
var rawData []byte
nid := make([]byte, len(id))
copy(nid, id)
startIndex := 0
rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, TxShareSize)
rawShare := []byte(append(append(nid, byte(startIndex)), rawData...))
rawShare := append(append(nid, byte(startIndex)), rawData...)
liamsi marked this conversation as resolved.
Show resolved Hide resolved
paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
share := NamespacedShare{paddedShare, nid}
shares = append(shares, share)
Expand Down