Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable tanglePrune #8

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module.exports = {
SharedArrayBuffer: 'readonly',
},
parserOptions: {
ecmaVersion: 2018,
ecmaVersion: 2019,
sourceType: 'module',
},
rules: {
Expand Down
33 changes: 2 additions & 31 deletions lib/add-group-tangle.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
const { isCloakedMsg } = require('ssb-ref')
const set = require('lodash.set')
const GetGroupTangle = require('./get-group-tangle')
const tanglePrune = require('./tangle-prune')

module.exports = function AddGroupTangle(server) {
const getGroupTangle = GetGroupTangle(server)
Expand All @@ -26,39 +27,9 @@ module.exports = function AddGroupTangle(server) {
if (err) return cb(null, content)

set(content, 'tangles.group', tangle)
//TODO: uncomment
//tanglePrune(content) // prune the group tangle down if needed
tanglePrune(content) // prune the group tangle down if needed

cb(null, content)
})
}
}

/* eslint-disable camelcase */
const MAX_SIZE_16_recps = 5320
const MAX_SIZE_1_recps = 5800

function tanglePrune(content, tangle = 'group', maxSize) {
maxSize =
maxSize || (content.recps > 1 ? MAX_SIZE_16_recps : MAX_SIZE_1_recps)
if (getLength(content) <= maxSize) return content

content.tangles[tangle].previous = content.tangles[tangle].previous.sort(() =>
Math.random() < 0.5 ? -1 : +1
)
// we shuffle so that if multiple peers are also trying to converge,
// we hopefully tangle differently and converge faster

while (
content.tangles[tangle].previous.length &&
getLength(content) > maxSize
) {
content.tangles[tangle].previous.pop()
}

return content
}

function getLength(obj) {
return JSON.stringify(obj).length
}
41 changes: 41 additions & 0 deletions lib/tangle-prune.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-FileCopyrightText: 2022 Mix Irving
//
// SPDX-License-Identifier: LGPL-3.0-only

const bipf = require('bipf')

/* eslint-disable camelcase */
// these variables are calculated in
// test/tangle-prune.test.js
// if these variables are out of date and
// * smaller than supposed to: we'll prune a bit much, tangles will converge a bit slower
// * bigger than supposed to: we'll prune less than we can. users might run into 'the message you want to publish is too big' more often
// but either way no catastrophe
const MAX_SIZE_16_recps = 5546
const MAX_SIZE_1_recps = 6041

module.exports = function tanglePrune(content) {
const tangle = 'group'
const maxSize =
content.recps.length > 1 ? MAX_SIZE_16_recps : MAX_SIZE_1_recps
if (getLength(content) <= maxSize) return content

content.tangles[tangle].previous = content.tangles[tangle].previous.sort(() =>
Math.random() < 0.5 ? -1 : +1
)
// we shuffle so that if multiple peers are also trying to converge,
// we hopefully tangle differently and converge faster

while (
content.tangles[tangle].previous.length &&
getLength(content) > maxSize
) {
content.tangles[tangle].previous.pop()
}

return content
}

function getLength(obj) {
return bipf.encodingLength(obj)
}
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"*.js"
],
"dependencies": {
"bipf": "^1.9.0",
"envelope-js": "^1.3.2",
"envelope-spec": "^1.1.0",
"lodash.get": "^4.4.2",
Expand All @@ -49,7 +50,7 @@
"pretty-quick": "^3.1.3",
"secret-stack": "^6.4.1",
"ssb-caps": "^1.1.0",
"ssb-db2": "^6.2.0",
"ssb-db2": "^6.2.3",
"ssb-ebt": "^9.1.2",
"tap-arc": "^0.3.4",
"tape": "^5.5.3"
Expand Down
122 changes: 122 additions & 0 deletions test/tangle-prune.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// SPDX-FileCopyrightText: 2022 Mix Irving
//
// SPDX-License-Identifier: LGPL-3.0-only

const test = require('tape')
const bipf = require('bipf')
const Testbot = require('./helpers/testbot')
const tanglePrune = require('../lib/tangle-prune')

const chars = 'abcABC123=+? '.split('')
const encodedLength = (obj) => bipf.encodingLength(obj)
const randomChar = () => chars.sort(() => (Math.random() < 0.5 ? -1 : +1))[0]
const randomText = (length) => {
let output = ''
while (output.length < length) output += randomChar()
return output
}

test('tangle prune', async (t) => {
const ssb = Testbot()
const ssbId = ssb.id
Copy link
Member

@mixmix mixmix Oct 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🌶️

(emoji for potential spicy problem)

With different feedFormats, the "id" of an feedId is of different lengths, this means that if recps is a GroupId + 15 feedIds.... they could be "sigil" style or "ssb-uri" style.

We need to clarify in a spec "when you are DM'ing a person to add them to a group, which feedId do you use?"
Is it their rootFeedId (which is bendy butt ssb-uri?)

cc @arj03

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EASY solution @Powersource - assume all ssb-uri of the longest length, which would currently be:

https://github.com/ssbc/ssb-uri2/blob/main/test/fixtures.js#L17-L26
this long:

ssb:feed/gabbygrove-v1/FY5OG311W4j_KPh8H9B2MZt4WSziy_p-ABkKERJdujQ=

or wait... what is this

ssb:feed/buttwoo-v1/FY5OG311W4j_KPh8H9B2MZt4WSziy_p-ABkKERJdujQ=/Z0rMVMDEO1Aj0uPl0_J2NlhFB2bbFLIHlty_YuqArFq=

Yeah we need a spec

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can they be URI style though? Have we allowed that anywhere actually?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ssb:feed/buttwoo-v1...

do we also know that this is the longest possible string?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also worth considering this

// these variables are calculated in
// test/tangle-prune.test.js
// if these variables are out of date and
// * smaller than supposed to: we'll prune a bit much, tangles will converge a bit slower
// * bigger than supposed to: we'll prune less than we can. users might run into 'the message you want to publish is too big' more often
// but either way no catastrophe
const MAX_SIZE_16_recps = 5546
const MAX_SIZE_1_recps = 6041


const group = await ssb.tribes2.create()

const publishSize = async (size, recpCount = 1) => {
const content = {
type: 'post',
text: randomText(size),
recps: [group.id, ...new Array(recpCount - 1).fill(ssbId)],
}

return new Promise((resolve, reject) => {
ssb.tribes2.publish(content, (err, msg) => {
if (err) return resolve(false)

ssb.db.get(msg.key, (err, msgVal) => {
if (err) return reject(err)
const plainLength = encodedLength(msgVal.content)
resolve(plainLength)
})
})
})
}

async function findMaxSize(numberRecps = 1) {
// Apply bisection method to find max size of a message which can be published

let lower = 4000
let mid
let upper = 8000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥 set upper to 17000

https://github.com/ssbc/ssb-buttwoo-spec#validation

the content length in bytes. This number must not exceed 16384.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we change this to 17000 now then later when I do #10 this test won't start failing. I sort of want it to fail so I know that I actually managed to swap the format. But maybe there are more solid ways to test which format we're using?


const results = new Map([])

//let i = 0
while (upper - lower > 1) {
mid = Math.ceil((lower + upper) / 2)

if (!results.has(lower)) {
const res =
results.get(lower) || (await publishSize(lower, numberRecps))
results.set(lower, res)
}

if (!results.has(mid)) {
const res = results.get(mid) || (await publishSize(mid, numberRecps))
results.set(mid, res)
}
if (!results.has(upper)) {
const res =
results.get(upper) || (await publishSize(upper, numberRecps))
results.set(upper, res)
}

//console.log(i++, {
// [lower]: results.get(lower),
// [mid]: results.get(mid),
// [upper]: results.get(upper),
//})

if (Boolean(results.get(lower)) !== Boolean(results.get(mid))) upper = mid
else if (Boolean(results.get(mid)) !== Boolean(results.get(upper)))
lower = mid
else throw new Error('bisection fail')
}

const result = results.get(upper) || results.get(mid) || results.get(lower)
t.pass(`max stringied content size for ${numberRecps} recps: ${result}`)
return result
}
const max16recps = await findMaxSize(16).catch(t.error) // 5546
const max1recp = await findMaxSize(1).catch(t.error) // 6041
ssb.close()

const msgId = '%RDORgMCjmL6vs51nR4bn0LWNe6wkBfbRJulSdOJsmwg=.sha256'
const content = (prevCount, numRecps) => ({
type: 'post',
text: 'hello!',
recps: new Array(numRecps).fill(ssbId),
Copy link
Member

@mixmix mixmix Oct 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

technically, this need to follow the format

recps: [GroupId, FeedId, ... ]  // you need to consider the length of a FeedId here 

tangles: {
group: {
root: msgId,
previous: new Array(prevCount).fill(msgId),
},
},
})

//console.time('prune')
const result16 = tanglePrune(content(4000, 16))
//console.timeEnd('prune')
t.true(
encodedLength(result16) <= max16recps,
`pruned ${4000 - result16.tangles.group.previous.length}`
)

const result1 = tanglePrune(content(4000, 1))
t.true(
encodedLength(result1) <= max1recp,
`pruned ${4000 - result1.tangles.group.previous.length}`
)
Comment on lines +107 to +119
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a feeling it's a good idea to check we're in the right ballpark and not just under a certain number. So changing this to something like this instead

const e = encodedLength(result16)
t.true(e > max16recps - 200 && e <= max16recps)


t.end()
})