diff --git a/include/faker-cxx/Helper.h b/include/faker-cxx/Helper.h index 412bf5588..3339fba64 100644 --- a/include/faker-cxx/Helper.h +++ b/include/faker-cxx/Helper.h @@ -57,6 +57,19 @@ class Helper return data[index]; } + template + static auto arrayElement(It start, It end) -> decltype(*::std::declval()) + { + size_t size = end - start; + if (size==0) + { + throw std::invalid_argument{"Range [start,end) is empty."}; + } + + const auto index = Number::integer(size - 1); + return start[index]; + } + /** * @brief Get a random element from a vector. * diff --git a/include/faker-cxx/Word.h b/include/faker-cxx/Word.h index 6fbf9bdd7..fe1e49e3d 100644 --- a/include/faker-cxx/Word.h +++ b/include/faker-cxx/Word.h @@ -2,6 +2,7 @@ #include #include +#include namespace faker { @@ -21,7 +22,7 @@ class Word * Word::sample(5) // "spell" * @endcode */ - static std::string sample(std::optional length = std::nullopt); + static std::string_view sample(std::optional length = std::nullopt); /** * @brief Returns a string containing a number of space separated random words. @@ -50,7 +51,7 @@ class Word * Word::adjective(3) // "bad" * @endcode */ - static std::string adjective(std::optional length = std::nullopt); + static std::string_view adjective(std::optional length = std::nullopt); /** * @brief Returns a random adverb. @@ -65,7 +66,7 @@ class Word * Word::adverb(5) // "almost" * @endcode */ - static std::string adverb(std::optional length = std::nullopt); + static std::string_view adverb(std::optional length = std::nullopt); /** * @brief Returns a random conjunction. @@ -80,7 +81,7 @@ class Word * Word::conjunction(6) // "indeed" * @endcode */ - static std::string conjunction(std::optional length = std::nullopt); + static std::string_view conjunction(std::optional length = std::nullopt); /** * @brief Returns a random interjection. @@ -95,7 +96,7 @@ class Word * Word::interjection(4) // "yuck" * @endcode */ - static std::string interjection(std::optional length = std::nullopt); + static std::string_view interjection(std::optional length = std::nullopt); /** * @brief Returns a random noun. @@ -110,7 +111,7 @@ class Word * Word::noun(8) // "distance" * @endcode */ - static std::string noun(std::optional length = std::nullopt); + static std::string_view noun(std::optional length = std::nullopt); /** * @brief Returns a random preposition. @@ -125,7 +126,7 @@ class Word * Word::preposition(4) // "from" * @endcode */ - static std::string preposition(std::optional length = std::nullopt); + static std::string_view preposition(std::optional length = std::nullopt); /** * @brief Returns a random verb. @@ -140,6 +141,6 @@ class Word * Word::verb(9) // "stabilise" * @endcode */ - static std::string verb(std::optional length = std::nullopt); + static std::string_view verb(std::optional length = std::nullopt); }; } diff --git a/src/common/FormatHelper.h b/src/common/FormatHelper.h index 6a94fd66a..ae343ee55 100644 --- a/src/common/FormatHelper.h +++ b/src/common/FormatHelper.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -33,5 +34,8 @@ class FormatHelper static std::string fillTokenValues(const std::string& format, std::unordered_map> tokenValueGenerators); + + static std::string fillTokenValues(const std::string& format, + std::unordered_map> tokenValueGenerators); }; } diff --git a/src/modules/person/Person.cpp b/src/modules/person/Person.cpp index ce2aebd25..78ce98a71 100644 --- a/src/modules/person/Person.cpp +++ b/src/modules/person/Person.cpp @@ -330,11 +330,11 @@ std::string Person::bio() { const auto randomBioFormat = static_cast(Helper::arrayElement(bioFormats)); - const std::unordered_map> dataGeneratorsMapping{ - {"bio_part", []() { return std::string{Helper::arrayElement(bioParts)}; }}, - {"bio_supporter", []() { return std::string{Helper::arrayElement(bioSupporters)}; }}, + const std::unordered_map> dataGeneratorsMapping{ + {"bio_part", []() { return Helper::arrayElement(bioParts); }}, + {"bio_supporter", []() { return Helper::arrayElement(bioSupporters); }}, {"noun", []() { return Word::noun(); }}, - {"emoji", []() { return std::string{Internet::emoji()}; }}}; + {"emoji", []() { return Internet::emoji(); }}}; return FormatHelper::fillTokenValues(randomBioFormat, dataGeneratorsMapping); } diff --git a/src/modules/word/Word.cpp b/src/modules/word/Word.cpp index 8a7f50194..fa1b559d1 100644 --- a/src/modules/word/Word.cpp +++ b/src/modules/word/Word.cpp @@ -1,199 +1,134 @@ #include "faker-cxx/Word.h" +#include +#include #include #include #include #include "../../common/StringHelper.h" -#include "data/Adjectives.h" -#include "data/Adverbs.h" -#include "data/Conjunctions.h" -#include "data/Interjections.h" -#include "data/Nouns.h" -#include "data/Prepositions.h" -#include "data/Verbs.h" +#include "data/All.h" #include "faker-cxx/Helper.h" namespace faker { -std::string Word::sample(std::optional length) +template +auto sortedSizeArrayElement(std::optional length, It start, It end) -> decltype(*std::declval()) { - std::vector allWords{adjectives}; - - allWords.insert(allWords.end(), adverbs.begin(), adverbs.end()); - allWords.insert(allWords.end(), conjunctions.begin(), conjunctions.end()); - allWords.insert(allWords.end(), interjections.begin(), interjections.end()); - allWords.insert(allWords.end(), nouns.begin(), nouns.end()); - allWords.insert(allWords.end(), prepositions.begin(), prepositions.end()); - allWords.insert(allWords.end(), verbs.begin(), verbs.end()); - if (!length) { - return Helper::arrayElement(allWords); + return Helper::arrayElement(start, end); } - const auto shuffledWords = Helper::shuffle(allWords); + size_t length_64 = length; + auto lower_it = ::std::lower_bound(start, end, length_64, + [](const auto& lhs, const auto& value) { return lhs.size() < value; }); - for (const auto& word : shuffledWords) + if (lower_it == end) { - if (word.size() == length) + return Helper::arrayElement(start, end); + } + else + { + if (lower_it->size() != length) + return Helper::arrayElement(start, end); + + auto upper_it = lower_it; + for (; upper_it != end; upper_it++) { - return word; + if (upper_it->size() != lower_it->size()) + break; } + return Helper::arrayElement(lower_it, upper_it); } - - return Helper::arrayElement(shuffledWords); } -std::string Word::words(unsigned numberOfWords) +std::string_view Word::sample(std::optional length) { - std::vector words; - - for (unsigned i = 0; i < numberOfWords; i++) - { - words.push_back(sample()); - } - - return StringHelper::joinString(words, " "); + return sortedSizeArrayElement(length, _allWords.cbegin(), _allWords.cend()); } -std::string Word::adjective(std::optional length) +std::string Word::words(unsigned numberOfWords) { - if (!length) + if (numberOfWords == 0) { - return Helper::arrayElement(adjectives); + return ""; } - const auto shuffledAdjectives = Helper::shuffle(adjectives); - - for (const auto& adjective : shuffledAdjectives) + std::string combined_words; + if (numberOfWords <= 256) { - if (adjective.size() == length) + std::array tmp; // fitting 1024 bytes worth of integers* + const size_t last_index = _allWords.size() - 1; + size_t reserve_size = 0; + + for (unsigned i = 0; i < numberOfWords; i++) { - return adjective; + tmp[i] = Number::integer(last_index); + auto vw = _allWords[tmp[i]]; + reserve_size += vw.size(); } - } - - return Helper::arrayElement(shuffledAdjectives); -} -std::string Word::adverb(std::optional length) -{ - if (!length) - { - return Helper::arrayElement(adverbs); - } - - const auto shuffledAdverbs = Helper::shuffle(adverbs); - - for (const auto& adverb : shuffledAdverbs) - { - if (adverb.size() == length) + unsigned space_words = (numberOfWords - 1); + combined_words.reserve(reserve_size + (numberOfWords - 1)); + for (unsigned i = 0; i < space_words; i++) { - return adverb; + auto vw = _allWords[tmp[i]]; + combined_words.append(vw.begin(), vw.end()); + combined_words.push_back(' '); } + auto vw = _allWords[tmp[numberOfWords - 1]]; + combined_words.append(vw.begin(), vw.end()); } - - return Helper::arrayElement(shuffledAdverbs); -} - -std::string Word::conjunction(std::optional length) -{ - if (!length) + else { - return Helper::arrayElement(conjunctions); - } - - const auto shuffledConjunctions = Helper::shuffle(conjunctions); - - for (const auto& conjunction : shuffledConjunctions) - { - if (conjunction.size() == length) + unsigned space_words = (numberOfWords - 1); + for (unsigned i = 0; i < space_words; i++) { - return conjunction; + auto s = sample(); + combined_words.append(s.begin(), s.end()); + combined_words.push_back(' '); } + + auto s = sample(); + combined_words.append(s.begin(), s.end()); } - return Helper::arrayElement(shuffledConjunctions); + return combined_words; } -std::string Word::interjection(std::optional length) +std::string_view Word::adjective(std::optional length) { - if (!length) - { - return Helper::arrayElement(interjections); - } - - const auto shuffledInterjections = Helper::shuffle(interjections); - - for (const auto& interjection : shuffledInterjections) - { - if (interjection.size() == length) - { - return interjection; - } - } - - return Helper::arrayElement(shuffledInterjections); + return sortedSizeArrayElement(length, _adjectives_sorted.cbegin(), _adjectives_sorted.cend()); } -std::string Word::noun(std::optional length) +std::string_view Word::adverb(std::optional length) { - if (!length) - { - return Helper::arrayElement(nouns); - } - - const auto shuffledNouns = Helper::shuffle(nouns); - - for (const auto& noun : shuffledNouns) - { - if (noun.size() == length) - { - return noun; - } - } - - return Helper::arrayElement(shuffledNouns); + return sortedSizeArrayElement(length, _adverbs_sorted.cbegin(), _adverbs_sorted.cend()); } -std::string Word::preposition(std::optional length) +std::string_view Word::conjunction(std::optional length) { - if (!length) - { - return Helper::arrayElement(prepositions); - } - - const auto shuffledPrepositions = Helper::shuffle(prepositions); - - for (const auto& preposition : shuffledPrepositions) - { - if (preposition.size() == length) - { - return preposition; - } - } - - return Helper::arrayElement(shuffledPrepositions); + return sortedSizeArrayElement(length, _conjunctions_sorted.cbegin(), _conjunctions_sorted.cend()); } -std::string Word::verb(std::optional length) +std::string_view Word::interjection(std::optional length) { - if (!length) - { - return Helper::arrayElement(verbs); - } + return sortedSizeArrayElement(length, _interjections_sorted.cbegin(), _interjections_sorted.cend()); +} - const auto shuffledVerbs = Helper::shuffle(verbs); +std::string_view Word::noun(std::optional length) +{ + return sortedSizeArrayElement(length, _nouns_sorted.cbegin(), _nouns_sorted.cend()); +} - for (const auto& verb : shuffledVerbs) - { - if (verb.size() == length) - { - return verb; - } - } +std::string_view Word::preposition(std::optional length) +{ + return sortedSizeArrayElement(length, _prepositions_sorted.cbegin(), _prepositions_sorted.cend()); +} - return Helper::arrayElement(shuffledVerbs); +std::string_view Word::verb(std::optional length) +{ + return sortedSizeArrayElement(length, _verbs_sorted.cbegin(), _verbs_sorted.cend()); } } diff --git a/src/modules/word/data/Adjectives.h b/src/modules/word/data/Adjectives.h index 91092db24..3614aa765 100644 --- a/src/modules/word/data/Adjectives.h +++ b/src/modules/word/data/Adjectives.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector adjectives = {"abandoned", +const std::array adjectives = {"abandoned", "able", "absolute", "adorable", diff --git a/src/modules/word/data/Adverbs.h b/src/modules/word/data/Adverbs.h index cf96ad814..c241aae2d 100644 --- a/src/modules/word/data/Adverbs.h +++ b/src/modules/word/data/Adverbs.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector adverbs = { +const std::array adverbs = { "abnormally", "absentmindedly", "accidentally", diff --git a/src/modules/word/data/All.h b/src/modules/word/data/All.h new file mode 100644 index 000000000..671c45b69 --- /dev/null +++ b/src/modules/word/data/All.h @@ -0,0 +1,185 @@ +#pragma once + +#include "Adjectives.h" +#include "Adverbs.h" +#include "Conjunctions.h" +#include "Interjections.h" +#include "Nouns.h" +#include "Prepositions.h" +#include "Verbs.h" + +namespace faker +{ + +// https://tristanbrindle.com/posts/a-more-useful-compile-time-quicksort +namespace cstd +{ +template +constexpr RAIt next(RAIt it, typename std::iterator_traits::difference_type n = 1) +{ + return it + n; +} + +template +constexpr auto distance(RAIt first, RAIt last) +{ + return last - first; +} + +template +constexpr void iter_swap(ForwardIt1 a, ForwardIt2 b) +{ + auto temp = std::move(*a); + *a = std::move(*b); + *b = std::move(temp); +} + +template +constexpr InputIt find_if_not(InputIt first, InputIt last, UnaryPredicate q) +{ + for (; first != last; ++first) + { + if (!q(*first)) + { + return first; + } + } + return last; +} + +template +constexpr ForwardIt partition(ForwardIt first, ForwardIt last, UnaryPredicate p) +{ + first = cstd::find_if_not(first, last, p); + if (first == last) + return first; + + for (ForwardIt i = cstd::next(first); i != last; ++i) + { + if (p(*i)) + { + cstd::iter_swap(i, first); + ++first; + } + } + return first; +} +} // namespace cstd + +template > +constexpr void quick_sort(RAIt first, RAIt last, Compare cmp = Compare{}) +{ + auto const N = cstd::distance(first, last); + if (N <= 1) + return; + auto const pivot = *cstd::next(first, N / 2); + auto const middle1 = cstd::partition(first, last, [=](auto const& elem) { return cmp(elem, pivot); }); + auto const middle2 = cstd::partition(middle1, last, [=](auto const& elem) { return !cmp(pivot, elem); }); + quick_sort(first, middle1, cmp); + quick_sort(middle2, last, cmp); +} + +const std::array + _allWords = []() +{ + std::array + table{}; + + size_t idx = 0; + for (const auto& v : adjectives) + { + table[idx] = v; + idx++; + } + + for (const auto& v : adverbs) + { + table[idx] = v; + idx++; + } + + for (const auto& v : conjunctions) + { + table[idx] = v; + idx++; + } + + for (const auto& v : interjections) + { + table[idx] = v; + idx++; + } + + for (const auto& v : nouns) + { + table[idx] = v; + idx++; + } + + for (const auto& v : prepositions) + { + table[idx] = v; + idx++; + } + + for (const auto& v : verbs) + { + table[idx] = v; + idx++; + } + + quick_sort(table.begin(), table.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return table; +}(); + +const auto _adjectives_sorted = []() +{ + auto sorted = adjectives; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); + +const auto _adverbs_sorted = []() +{ + auto sorted = adverbs; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); + +const auto _conjunctions_sorted = []() +{ + auto sorted = conjunctions; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); + +const auto _interjections_sorted = []() +{ + auto sorted = interjections; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); + +const auto _nouns_sorted = []() +{ + auto sorted = nouns; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); + +const auto _prepositions_sorted = []() +{ + auto sorted = prepositions; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); + +const auto _verbs_sorted = []() +{ + auto sorted = verbs; + quick_sort(sorted.begin(), sorted.end(), [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }); + return sorted; +}(); +} \ No newline at end of file diff --git a/src/modules/word/data/Conjunctions.h b/src/modules/word/data/Conjunctions.h index ca461fde5..f79879360 100644 --- a/src/modules/word/data/Conjunctions.h +++ b/src/modules/word/data/Conjunctions.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector conjunctions = { +const std::array conjunctions = { "after", "although", "and", "as", "because", "before", "but", "consequently", "even", "finally", "for", "furthermore", "hence", "how", "however", "if", "inasmuch", "incidentally", "indeed", "instead", "lest", "likewise", "meanwhile", "nor", diff --git a/src/modules/word/data/Interjections.h b/src/modules/word/data/Interjections.h index ce59349c2..574023825 100644 --- a/src/modules/word/data/Interjections.h +++ b/src/modules/word/data/Interjections.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector interjections = { +const std::array interjections = { "yuck", "oh", "phooey", "blah", "boo", "whoa", "yowza", "huzzah", "boohoo", "fooey", "geez", "pfft", "ew", "ah", "yum", "brr", "hm", "yahoo", "aha", "woot", "drat", "gah", "meh", "psst", "aw", "ugh", "yippee", "eek", "gee", "bah", "gadzooks", "duh", "ha", "mmm", "ouch", "phew", diff --git a/src/modules/word/data/Nouns.h b/src/modules/word/data/Nouns.h index a8c8b7b60..97f62595b 100644 --- a/src/modules/word/data/Nouns.h +++ b/src/modules/word/data/Nouns.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector nouns = {"ATM", +const std::array nouns = {"ATM", "CD", "SUV", "TV", diff --git a/src/modules/word/data/Prepositions.h b/src/modules/word/data/Prepositions.h index 106241d7e..bc53a9df1 100644 --- a/src/modules/word/data/Prepositions.h +++ b/src/modules/word/data/Prepositions.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector prepositions = { +const std::array prepositions = { "a", "abaft", "aboard", diff --git a/src/modules/word/data/Verbs.h b/src/modules/word/data/Verbs.h index 725f2d61b..b1492c19e 100644 --- a/src/modules/word/data/Verbs.h +++ b/src/modules/word/data/Verbs.h @@ -1,11 +1,11 @@ #pragma once -#include -#include +#include +#include namespace faker { -const std::vector verbs = {"abandon", +const std::array verbs = {"abandon", "abase", "abate", "abbreviate",