Skip to content

Commit

Permalink
refactor words
Browse files Browse the repository at this point in the history
Use std::string_view where possible except for words().
Avoid allocating and shuffling data! Adding a header to combine all data together, organized by length once. Functions rewritten to retrieve by length (binary search) if possible later.
  • Loading branch information
Andersama committed Jun 12, 2024
1 parent ff95aa8 commit e44baf3
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 169 deletions.
13 changes: 13 additions & 0 deletions include/faker-cxx/Helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,19 @@ class Helper
return data[index];
}

template <typename It>
static auto arrayElement(It start, It end) -> decltype(*::std::declval<It>())
{
size_t size = end - start;
if (size==0)
{
throw std::invalid_argument{"Range [start,end) is empty."};
}

const auto index = Number::integer<size_t>(size - 1);
return start[index];
}

/**
* @brief Get a random element from a vector.
*
Expand Down
17 changes: 9 additions & 8 deletions include/faker-cxx/Word.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <optional>
#include <string>
#include <string_view>

namespace faker
{
Expand All @@ -21,7 +22,7 @@ class Word
* Word::sample(5) // "spell"
* @endcode
*/
static std::string sample(std::optional<unsigned> length = std::nullopt);
static std::string_view sample(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a string containing a number of space separated random words.
Expand Down Expand Up @@ -50,7 +51,7 @@ class Word
* Word::adjective(3) // "bad"
* @endcode
*/
static std::string adjective(std::optional<unsigned> length = std::nullopt);
static std::string_view adjective(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a random adverb.
Expand All @@ -65,7 +66,7 @@ class Word
* Word::adverb(5) // "almost"
* @endcode
*/
static std::string adverb(std::optional<unsigned> length = std::nullopt);
static std::string_view adverb(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a random conjunction.
Expand All @@ -80,7 +81,7 @@ class Word
* Word::conjunction(6) // "indeed"
* @endcode
*/
static std::string conjunction(std::optional<unsigned> length = std::nullopt);
static std::string_view conjunction(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a random interjection.
Expand All @@ -95,7 +96,7 @@ class Word
* Word::interjection(4) // "yuck"
* @endcode
*/
static std::string interjection(std::optional<unsigned> length = std::nullopt);
static std::string_view interjection(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a random noun.
Expand All @@ -110,7 +111,7 @@ class Word
* Word::noun(8) // "distance"
* @endcode
*/
static std::string noun(std::optional<unsigned> length = std::nullopt);
static std::string_view noun(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a random preposition.
Expand All @@ -125,7 +126,7 @@ class Word
* Word::preposition(4) // "from"
* @endcode
*/
static std::string preposition(std::optional<unsigned> length = std::nullopt);
static std::string_view preposition(std::optional<unsigned> length = std::nullopt);

/**
* @brief Returns a random verb.
Expand All @@ -140,6 +141,6 @@ class Word
* Word::verb(9) // "stabilise"
* @endcode
*/
static std::string verb(std::optional<unsigned> length = std::nullopt);
static std::string_view verb(std::optional<unsigned> length = std::nullopt);
};
}
213 changes: 73 additions & 140 deletions src/modules/word/Word.cpp
Original file line number Diff line number Diff line change
@@ -1,199 +1,132 @@
#include "faker-cxx/Word.h"

#include <array>
#include <optional>
#include <string>
#include <vector>

#include "../../common/StringHelper.h"
#include "data/Adjectives.h"
#include "data/Adverbs.h"
#include "data/Conjunctions.h"
#include "data/Interjections.h"
#include "data/Nouns.h"
#include "data/Prepositions.h"
#include "data/Verbs.h"
#include "data/All.h"
#include "faker-cxx/Helper.h"

namespace faker
{
std::string Word::sample(std::optional<unsigned int> length)
template <typename It>
auto sortedSizeArrayElement(std::optional<unsigned int> length, It start, It end) -> decltype(*std::declval<It>())
{
std::vector<std::string> allWords{adjectives};

allWords.insert(allWords.end(), adverbs.begin(), adverbs.end());
allWords.insert(allWords.end(), conjunctions.begin(), conjunctions.end());
allWords.insert(allWords.end(), interjections.begin(), interjections.end());
allWords.insert(allWords.end(), nouns.begin(), nouns.end());
allWords.insert(allWords.end(), prepositions.begin(), prepositions.end());
allWords.insert(allWords.end(), verbs.begin(), verbs.end());

if (!length)
{
return Helper::arrayElement<std::string>(allWords);
return Helper::arrayElement(start, end);
}

const auto shuffledWords = Helper::shuffle(allWords);
auto lower_it =
::std::lower_bound(start, end, length, [](const auto& lhs, const auto& value) { return lhs.size() < length; });

for (const auto& word : shuffledWords)
if (lower_it == end)
{
if (word.size() == length)
return Helper::arrayElement(start, end);
}
else
{
if (lower_it->size() != length)
return Helper::arrayElement(start, end);

auto upper_it = lower_it;
for (; upper_it != end; upper_it++)
{
return word;
if (upper_it->size() != lower_it->size())
break;
}
return Helper::arrayElement(lower_it, upper_it);
}

return Helper::arrayElement<std::string>(shuffledWords);
}

std::string Word::words(unsigned numberOfWords)
std::string_view Word::sample(std::optional<unsigned int> length)
{
std::vector<std::string> words;

for (unsigned i = 0; i < numberOfWords; i++)
{
words.push_back(sample());
}

return StringHelper::joinString(words, " ");
return sortedSizeArrayElement(length, _allWords.cbegin(), _allWords.cend());
}

std::string Word::adjective(std::optional<unsigned int> length)
std::string Word::words(unsigned numberOfWords)
{
if (!length)
if (numberOfWords == 0)
{
return Helper::arrayElement<std::string>(adjectives);
return "";
}

const auto shuffledAdjectives = Helper::shuffle(adjectives);

for (const auto& adjective : shuffledAdjectives)
std::string combined_words;
if (numberOfWords <= 256)
{
if (adjective.size() == length)
std::array<unsigned int, 256> tmp; // fitting 1024 bytes worth of integers*
const size_t last_index = _allWords.size() - 1;
size_t reserve_size = 0;

for (unsigned i = 0; i < numberOfWords; i++)
{
return adjective;
tmp[i] = Number::integer<unsigned int>(last_index);
auto vw = _allWords[tmp[i]];
reserve_size += vw.size();
}
}

return Helper::arrayElement<std::string>(shuffledAdjectives);
}

std::string Word::adverb(std::optional<unsigned int> length)
{
if (!length)
{
return Helper::arrayElement<std::string>(adverbs);
}

const auto shuffledAdverbs = Helper::shuffle(adverbs);

for (const auto& adverb : shuffledAdverbs)
{
if (adverb.size() == length)
unsigned space_words = (numberOfWords - 1);
combined_words.reserve(reserve_size + (numberOfWords - 1));
for (unsigned i = 0; i < space_words; i++)
{
return adverb;
auto vw = _allWords[tmp[i]];
combined_words.append(vw.begin(), vw.end());
combined_words.push_back(' ');
}
auto vw = _allWords[tmp[numberOfWords - 1]];
combined_words.append(vw.begin(), vw.end());
}

return Helper::arrayElement<std::string>(shuffledAdverbs);
}

std::string Word::conjunction(std::optional<unsigned int> length)
{
if (!length)
else
{
return Helper::arrayElement<std::string>(conjunctions);
}

const auto shuffledConjunctions = Helper::shuffle(conjunctions);

for (const auto& conjunction : shuffledConjunctions)
{
if (conjunction.size() == length)
unsigned space_words = (numberOfWords - 1);
for (unsigned i = 0; i < space_words; i++)
{
return conjunction;
auto s = sample();
combined_words.append(s.begin(), s.end());
combined_words.push_back(' ');
}

auto s = sample();
combined_words.append(s.begin(), s.end());
}

return Helper::arrayElement<std::string>(shuffledConjunctions);
return combined_words;
}

std::string Word::interjection(std::optional<unsigned int> length)
std::string_view Word::adjective(std::optional<unsigned int> length)
{
if (!length)
{
return Helper::arrayElement<std::string>(interjections);
}

const auto shuffledInterjections = Helper::shuffle(interjections);

for (const auto& interjection : shuffledInterjections)
{
if (interjection.size() == length)
{
return interjection;
}
}

return Helper::arrayElement<std::string>(shuffledInterjections);
return sortedSizeArrayElement(length, _adjectives_sorted.cbegin(), _adjectives_sorted.cend());
}

std::string Word::noun(std::optional<unsigned int> length)
std::string_view Word::adverb(std::optional<unsigned int> length)
{
if (!length)
{
return Helper::arrayElement<std::string>(nouns);
}

const auto shuffledNouns = Helper::shuffle(nouns);

for (const auto& noun : shuffledNouns)
{
if (noun.size() == length)
{
return noun;
}
}

return Helper::arrayElement<std::string>(shuffledNouns);
return sortedSizeArrayElement(length, _adverbs_sorted.cbegin(), _adverbs_sorted.cend());
}

std::string Word::preposition(std::optional<unsigned int> length)
std::string_view Word::conjunction(std::optional<unsigned int> length)
{
if (!length)
{
return Helper::arrayElement<std::string>(prepositions);
}

const auto shuffledPrepositions = Helper::shuffle(prepositions);

for (const auto& preposition : shuffledPrepositions)
{
if (preposition.size() == length)
{
return preposition;
}
}

return Helper::arrayElement<std::string>(shuffledPrepositions);
return sortedSizeArrayElement(length, _conjunctions_sorted.cbegin(), _conjunctions_sorted.cend());
}

std::string Word::verb(std::optional<unsigned int> length)
std::string_view Word::interjection(std::optional<unsigned int> length)
{
if (!length)
{
return Helper::arrayElement<std::string>(verbs);
}
return sortedSizeArrayElement(length, _interjections_sorted.cbegin(), _interjections_sorted.cend());
}

const auto shuffledVerbs = Helper::shuffle(verbs);
std::string_view Word::noun(std::optional<unsigned int> length)
{
return sortedSizeArrayElement(length, _nouns_sorted.cbegin(), _nouns_sorted.cend());
}

for (const auto& verb : shuffledVerbs)
{
if (verb.size() == length)
{
return verb;
}
}
std::string_view Word::preposition(std::optional<unsigned int> length)
{
return sortedSizeArrayElement(length, _prepositions_sorted.cbegin(), _prepositions_sorted.cend());
}

return Helper::arrayElement<std::string>(shuffledVerbs);
std::string_view Word::verb(std::optional<unsigned int> length)
{
return sortedSizeArrayElement(length, _verbs_sorted.cbegin(), _verbs_sorted.cend());
}
}
6 changes: 3 additions & 3 deletions src/modules/word/data/Adjectives.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#pragma once

#include <string>
#include <vector>
#include <array>
#include <string_view>

namespace faker
{
const std::vector<std::string> adjectives = {"abandoned",
const std::array<std::string_view,1328> adjectives = {"abandoned",
"able",
"absolute",
"adorable",
Expand Down
Loading

0 comments on commit e44baf3

Please sign in to comment.