From a84d0c77513a5718d2588e57f808ef70fe604c70 Mon Sep 17 00:00:00 2001 From: ajuvercr Date: Tue, 15 Oct 2024 13:38:39 +0200 Subject: [PATCH] chore: handle pr feedback --- README.md | 7 +++---- configs/bucketizer_configs.ttl | 4 ++-- src/bucketizers.ts | 1 - src/bucketizers/index.ts | 22 +++++++++++----------- src/bucketizers/timeBucketTree.ts | 9 +++++---- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index a8b8609..48ba5e5 100644 --- a/README.md +++ b/README.md @@ -118,11 +118,8 @@ When a member arrives, all buckets that hold members with a timestamp older than tree:timestampPath ; tree:buffer 5000; # members can arrive 5 seconds out of sync () tree:level ( [ # Create 5 levels, resulting uri's //// - tree:range "year"; + tree:range "year", "month"; tree:maxSize 0; # place no members at this level - ] [ - tree:range "month"; - tree:maxSize 0; # place no members at this level ] [ tree:range "day-of-month"; tree:maxSize 1000; # place at most 1000 members at this level @@ -135,6 +132,8 @@ When a member arrives, all buckets that hold members with a timestamp older than ] ). ``` +This fragmentation will look like this `${year}-${month}/${day}/${hour}/${minute}` after ingesting 2001 members in the same hour (filling day and hour). + ### [`js:Ldesify`](https://github.com/rdf-connect/sds-processors/blob/master/configs/ldesify.ttl#L10) diff --git a/configs/bucketizer_configs.ttl b/configs/bucketizer_configs.ttl index acf54cb..0b48037 100644 --- a/configs/bucketizer_configs.ttl +++ b/configs/bucketizer_configs.ttl @@ -109,7 +109,7 @@ ]. [ ] a sh:NodeShape; - sh:targetClass tree:timeBucket; + sh:targetClass tree:TimeBucketLevel; sh:property [ sh:name "ranges"; sh:path tree:range; @@ -141,7 +141,7 @@ ], [ sh:name "levels"; sh:path tree:level; - sh:class tree:timeBucket; + sh:class tree:TimeBucketLevel; sh:minCount 1; ], [ sh:name "timeBufferMs"; diff --git a/src/bucketizers.ts b/src/bucketizers.ts index ae72022..983c196 100644 --- a/src/bucketizers.ts +++ b/src/bucketizers.ts @@ -320,7 +320,6 @@ export async function bucketize( newMembers, newRelations, prefix, - // sourceStream?.value || "/", ); record_buckets.forEach((x) => requestedBuckets.add(x)); diff --git a/src/bucketizers/index.ts b/src/bucketizers/index.ts index 8fdf7d7..5ebfe4d 100644 --- a/src/bucketizers/index.ts +++ b/src/bucketizers/index.ts @@ -14,9 +14,7 @@ import SubjectBucketizer from "./subjectBucketizer"; import TimebasedBucketizer from "./timebasedBucketizer"; import { $INLINE_FILE } from "@ajuvercr/ts-transformer-inline-file"; -import TimeBucketTreeBucketizer, { - TimeBucketTreeConfig, -} from "./timeBucketTree"; +import TimeBucketBucketizer, { TimeBucketTreeConfig } from "./timeBucketTree"; export { TimeBucketTreeConfig } from "./timeBucketTree"; @@ -86,7 +84,7 @@ function createBucketizer(config: BucketizerConfig, save?: string): Bucketizer { save, ); case TREE.custom("TimeBucketFragmentation"): - return new TimeBucketTreeBucketizer( + return new TimeBucketBucketizer( config.config, save, ); @@ -94,6 +92,14 @@ function createBucketizer(config: BucketizerConfig, save?: string): Bucketizer { throw "Unknown bucketizer " + config.type.value; } +function combineIds(id1: string, id2: string) { + const id1Slash = id1.endsWith("/"); + const id2Slash = id1.startsWith("/"); + if (id1Slash && id2Slash) return id1 + id2.slice(1); + if (id1 === "" || id1Slash || id2Slash) return id1 + id2; + return id1 + "/" + id2; +} + export class BucketizerOrchestrator { private readonly configs: BucketizerConfig[]; @@ -158,16 +164,10 @@ export class BucketizerOrchestrator { const key = value.endsWith("/") ? encodedValue : encodedValue + "/"; - // const key = value; // If the requested bucket is the root, it actually is the previous bucket // avoid double slashes and leading slashes - const next = - prefix.endsWith("/") || - prefix == "" || - key.startsWith("/") - ? prefix + key - : prefix + "/" + key; + const next = combineIds(prefix, key); const id = root ? prefix : next; if (!buckets[id]) { buckets[id] = new Bucket(df.namedNode(id), [], false); diff --git a/src/bucketizers/timeBucketTree.ts b/src/bucketizers/timeBucketTree.ts index 171fa24..844f456 100644 --- a/src/bucketizers/timeBucketTree.ts +++ b/src/bucketizers/timeBucketTree.ts @@ -222,7 +222,7 @@ function hydrate(state: State) { } } -export default class TimeBucketTreeBucketizer implements Bucketizer { +export default class TimeBucketBucketizer implements Bucketizer { private readonly config: TimeBucketTreeConfig; private readonly path: BasicLensM; private readonly state: State = {}; @@ -291,7 +291,7 @@ export default class TimeBucketTreeBucketizer implements Bucketizer { const found = goInState(state, levelValue, date, lastF); state = found.value.deep; - key = concat_key(key, levelValue); + key = concatKey(key, levelValue); const nextBucket = getBucket(key); if (!found.found) { @@ -314,6 +314,7 @@ export default class TimeBucketTreeBucketizer implements Bucketizer { this.config.pathQuads, ); } + bucket = nextBucket; if (found.value.count < level.amount) { found.value.count += 1; @@ -332,7 +333,7 @@ export default class TimeBucketTreeBucketizer implements Bucketizer { } } -function concat_key(path: string, key: string): string { +function concatKey(path: string, key: string): string { if (path.length === 0) { return key; } else { @@ -349,7 +350,7 @@ function checkImmutable( for (const key of Object.keys(state)) { const inner = state[key]; if (!inner.immutable && inner.end < end) { - const innerPath = concat_key(path, key); + const innerPath = concatKey(path, key); const bucket = getBucket(innerPath); inner.immutable = true; bucket.immutable = true;