diff --git a/manual/extensions/full_text_searchable/FullTextSearchable.md b/manual/extensions/full_text_searchable/FullTextSearchable.md index b4b5a66c3..27af31241 100644 --- a/manual/extensions/full_text_searchable/FullTextSearchable.md +++ b/manual/extensions/full_text_searchable/FullTextSearchable.md @@ -19,7 +19,7 @@ Let's assume we have a blog and want to implement full text search over title an This migration will create a 3rd column named `full_text_vector` of type `tsvector`, a gin index, a trigger and a function to update automatically this column. -Over the `on` keyword, '{"title", 'A'}' means it allows search of the content of "title", with level of priority (weight) "A", which tells postgres than title content is more meaningful than the article content itself. +Over the `on` keyword, `'{"title", 'A'}'` means it allows search of the content of "title", with level of priority (weight) "A", which tells postgres than title content is more meaningful than the article content itself. Now, let's build some models: diff --git a/spec/model/full_text_searchable_spec.cr b/spec/extensions/full_text_searchable_spec.cr similarity index 93% rename from spec/model/full_text_searchable_spec.cr rename to spec/extensions/full_text_searchable_spec.cr index 355adcc6a..0e30ff2ce 100644 --- a/spec/model/full_text_searchable_spec.cr +++ b/spec/extensions/full_text_searchable_spec.cr @@ -33,16 +33,16 @@ module FullTextSearchableSpec describe "test tsv searchable" do it "Can translate client query to ts_query" do - Clear::Model::FullTextSearchable.to_tsq("rick & morty").should eq("'rick & morty'") - Clear::Model::FullTextSearchable.to_tsq("rick+morty").should eq("'rick morty'") + Clear::Model::FullTextSearchable.to_tsq("rick & morty").should eq("'rick' & '&' & 'morty'") + Clear::Model::FullTextSearchable.to_tsq("rick+morty").should eq("'rick' & 'morty'") Clear::Model::FullTextSearchable.to_tsq("\"rick morty\"").should eq("'rick morty'") Clear::Model::FullTextSearchable.to_tsq("'rick morty'").should eq("'rick morty'") - Clear::Model::FullTextSearchable.to_tsq("rick morty").should eq("'rick' | 'morty'") + Clear::Model::FullTextSearchable.to_tsq("rick morty").should eq("'rick' & 'morty'") Clear::Model::FullTextSearchable.to_tsq("rick -morty").should eq("'rick' & !'morty'") - Clear::Model::FullTextSearchable.to_tsq("rick -'rick hunter'").should eq("'rick' & !'rick hunter' ") + Clear::Model::FullTextSearchable.to_tsq("rick -'rick hunter'").should eq("'rick' & !'rick hunter'") Clear::Model::FullTextSearchable.to_tsq("l'esplanade").should eq("'l''esplanade'") - Clear::Model::FullTextSearchable.to_tsq("'l''usine'").should eq("'l''usine'") - Clear::Model::FullTextSearchable.to_tsq("'l'usine").should eq("'l''usine'") + Clear::Model::FullTextSearchable.to_tsq("'l''usine'").should eq("'l' & 'usine'") + Clear::Model::FullTextSearchable.to_tsq("'l'usine").should eq("'l' & 'usine'") end it "Can search through tsvector" do @@ -59,8 +59,8 @@ module FullTextSearchableSpec description: "Going in jail and escape with his innocent brother"}) Series.query.search("breaking").count.should eq 3 - Series.query.search("break & !prison").count.should eq 2 - Series.query.search("break | throne").count.should eq 4 + Series.query.search("break -prison").count.should eq 2 + Series.query.search("break throne").count.should eq 0 end end end diff --git a/spec/sql/jsonb_spec.cr b/spec/extensions/jsonb_spec.cr similarity index 100% rename from spec/sql/jsonb_spec.cr rename to spec/extensions/jsonb_spec.cr diff --git a/src/clear/extensions/full_text_searchable/migration.cr b/src/clear/extensions/full_text_searchable/migration.cr index e81b06b97..774f41013 100644 --- a/src/clear/extensions/full_text_searchable/migration.cr +++ b/src/clear/extensions/full_text_searchable/migration.cr @@ -4,7 +4,6 @@ struct Clear::Migration::FullTextSearchableOperation < Clear::Migration::Operati IMPORTANT = 'B' NORMAL = 'C' LOW = 'D' - VERY_LOW = 'E' end getter table : String @@ -27,8 +26,16 @@ struct Clear::Migration::FullTextSearchableOperation < Clear::Migration::Operati @function_name = function_name || "tsv_trigger_#{table}" end + private def ensure_priority!(field_priority : Char) + unless field_priority >= 'A' && field_priority <= 'D' + raise "Priority level for tsvector range from 'A' (higher) to 'D' (lower)" + end + end + private def print_concat_rules(use_new = true) src_fields.map do |(field_name, field_priority)| + ensure_priority!(field_priority) + "setweight(to_tsvector(#{Clear::Expression[catalog]}, coalesce(#{use_new && "new." || ""}#{field_name}, ''))," + " #{Clear::Expression[field_priority]})" end.join(" || ") diff --git a/src/clear/extensions/full_text_searchable/model.cr b/src/clear/extensions/full_text_searchable/model.cr index 0f0c67988..2f535763c 100644 --- a/src/clear/extensions/full_text_searchable/model.cr +++ b/src/clear/extensions/full_text_searchable/model.cr @@ -3,10 +3,9 @@ require "./tsvector" module Clear::Model::FullTextSearchable # Set this model as searchable using tsvector macro full_text_searchable(through = "full_text_vector", catalog = "pg_catalog.english", scope_name = "search") - # TODO: Use converter and tsv structure column( {{through.id}} : Clear::TSVector, presence: false, converter: Clear::TSVector::Converter ) - scope "#{scope_name.id}" do |str| + scope "{{scope_name.id}}" do |str| where{ op({{through.id}}, to_tsquery({{catalog}}, Clear::Model::FullTextSearchable.to_tsq(str)), "@@") } end @@ -15,25 +14,70 @@ module Clear::Model::FullTextSearchable # :nodoc: # Split a chain written by a user # A problem to solve is the `'` character - def self.split_to_exp(text) - in_quote = false - quote_start = nil - ignore_next_quote = false - exp = [] of String + private def self.split_to_exp(text) + last_char : Char? = nil + quote_char : Char? = nil + modifier : Symbol? = nil + + currtoken = [] of Char + arr_tokens = [] of {Symbol?, String} + text.chars.each_with_index do |c, idx| case c - when /[A-Z0-9]/i - # if it's a alphanumerical character - ignore_next_quote = true - ignore_next_quote - when '\'', '"' - if (in_quote && quote_start == c) + when '\'' + if quote_char.nil? + if last_char.to_s =~ /[a-z0-9]/i # Avoid french word e.g. "l'avion" + currtoken << c + else + quote_char = '\'' + end + elsif quote_char == '\'' + arr_tokens << {modifier, currtoken.join} + currtoken.clear + modifier = nil + quote_char = nil + else + currtoken << c end - - in_quote = true - quote_start = c + when ' ' + if quote_char.nil? + if currtoken.any? + arr_tokens << {modifier, currtoken.join} + currtoken.clear + end + modifier = nil + else + currtoken << c + end + when '"' + if (quote_char.nil?) + quote_char = '"' + elsif quote_char == '"' + arr_tokens << {modifier, currtoken.join} + currtoken.clear + modifier = nil + quote_char = nil + else + currtoken << c + end + when '-' + if currtoken.empty? && quote_char.nil? # When first char of the token == `-` + modifier = :- + else + currtoken << c + end + else + currtoken << c end + + last_char = c end + + if currtoken.any? + arr_tokens << {modifier, currtoken.join} + end + + arr_tokens end # Parse client side text and generate string ready to be ingested by PG's `to_tsquery`. @@ -42,20 +86,19 @@ module Clear::Model::FullTextSearchable # `search` method use then a wrapper text_to_search used to ensure than # request is understood and produce ALWAYS legal string for `to_tsquery` # This is a good helper then to use with the input of your end-users ! + # + # However, this helper can be improved, as it doesn't use all the features + # of tsvector (parentesis, OR operator etc...) def self.to_tsq(text) - return text - current_str = "" - in_quote = false - text.chars.each_with_index do |c, idx| - case c - when '\'' - in_quote = !in_quote - if (!in_quote) - current_str - end - when '-' + text = text.gsub(/\+/, " ") + tokens = split_to_exp(text) + + return tokens.map do |(modifier, value)| + if modifier == :- + "!" + Clear::Expression[value] else + Clear::Expression[value] end - end + end.join(" & ") end end