Skip to content

Commit

Permalink
fix(censor): don't generate the same character twice in a row (#85)
Browse files Browse the repository at this point in the history
In randomCharFromSetCensorStrategy(). This produces more interesting
strings and avoids generating "@$$" as a side-effect.

Fixes #82
  • Loading branch information
eltoder authored Dec 29, 2024
1 parent e51e345 commit 58f2715
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 8 deletions.
24 changes: 18 additions & 6 deletions src/censor/BuiltinStrategies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,25 +139,37 @@ export function fixedCharCensorStrategy(char: string): TextCensorStrategy {

/**
* A text censoring strategy that generates replacement strings made up of
* random characters from the set of characters provided.
* random characters from the set of characters provided. The strings never
* contain two of the same character in a row.
*
* @example
* ```typescript
* const strategy = randomCharFromSetCensorStrategy('$#!');
* const censor = new TextCensor().setStrategy(strategy);
* // Before: 'fuck you!'
* // After: '!##$ you!'
* // After: '!#$# you!'
* ```
* @param charset - Set of characters from which the replacement string should
* be constructed. Must not be empty.
* be constructed. Must have at least two characters.
* @returns A [[TextCensorStrategy]] for use with the [[TextCensor]].
*/
export function randomCharFromSetCensorStrategy(charset: string): TextCensorStrategy {
const chars = [...charset];
if (chars.length === 0) throw new Error('The character set passed must not be empty.');
if (chars.length < 2) throw new Error('The character set passed must have at least 2 characters.');
return (ctx: CensorContext) => {
let censored = '';
for (let i = 0; i < ctx.matchLength; i++) censored += chars[Math.floor(Math.random() * chars.length)];
if (ctx.matchLength === 0) return '';

let lastIdx = Math.floor(Math.random() * chars.length);
let censored = chars[lastIdx];
for (let i = 1; i < ctx.matchLength; i++) {
let idx = Math.floor(Math.random() * (chars.length - 1));
// Transform the distribution for idx from [0, len-1) to
// [0, lastIdx) ∪ (lastIdx, len) to exclude lastIdx while
// ensuring a uniform distribution of generated characters.
if (idx >= lastIdx) idx++;
lastIdx = idx;
censored += chars[idx];
}
return censored;
};
}
16 changes: 14 additions & 2 deletions test/censor/BuiltinStrategies.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,13 @@ describe('fixedCharCensorStrategy()', () => {
});

describe('randomCharFromSetCensorStrategy()', () => {
it('should throw if the charset is empty', () => {
expect(() => randomCharFromSetCensorStrategy('')).toThrow(new Error('The character set passed must not be empty.'));
it('should throw if the charset has less than 2 characters', () => {
expect(() => randomCharFromSetCensorStrategy('')).toThrow(
new Error('The character set passed must have at least 2 characters.'),
);
expect(() => randomCharFromSetCensorStrategy('a')).toThrow(
new Error('The character set passed must have at least 2 characters.'),
);
});

it('should work for matchLength 0', () => {
Expand All @@ -144,4 +149,11 @@ describe('randomCharFromSetCensorStrategy()', () => {
const strategy = randomCharFromSetCensorStrategy(charset);
expect([...strategy({ ...partialCtx, matchLength: 5 })].every((c) => charset.includes(c))).toBeTruthy();
});

it('should not repeat the same character twice in a row', () => {
const strategy = randomCharFromSetCensorStrategy('ab');
for (let i = 0; i < 100; i++) {
expect(['aba', 'bab']).toContain(strategy({ ...partialCtx, matchLength: 3 }));
}
});
});

0 comments on commit 58f2715

Please sign in to comment.