diff --git a/app/services/language_service.rb b/app/services/language_service.rb index e89c8ef132..5f7ef9ce42 100644 --- a/app/services/language_service.rb +++ b/app/services/language_service.rb @@ -39,6 +39,8 @@ def latin?(value) end def tokenize(value) + return [] unless value.is_a?(String) + value&.split&.map do |elem| diacriticless = strip_diacritics(elem) # NOTE: Text::Metaphone produces upcase strings. Upcasing to keep consistent output. diff --git a/spec/services/language_service_spec.rb b/spec/services/language_service_spec.rb index 189bd5baff..79dc1567c0 100644 --- a/spec/services/language_service_spec.rb +++ b/spec/services/language_service_spec.rb @@ -14,4 +14,14 @@ ).to eq(['فتح', 'أصلا', 'عنف', 'طفل', 'ملف', 'راح إلى محطة', 'احمد', 'محمد']) end end + + describe 'when values include an integer' do + it 'should skip integer values' do + expect( + [0, 1, 'sample'].map do |elem| + LanguageService.tokenize(elem).uniq + end + ).to match_array([[], [], ['SMPL']]) + end + end end