Skip to content

Commit

Permalink
Final fix, ready to release tsvector support ! :)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yacine Petitprez committed Jul 2, 2018
1 parent 1dcfdaa commit e8bbfa5
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Let's assume we have a blog and want to implement full text search over title an
This migration will create a 3rd column named `full_text_vector` of type `tsvector`,
a gin index, a trigger and a function to update automatically this column.

Over the `on` keyword, '{"title", 'A'}' means it allows search of the content of "title", with level of priority (weight) "A", which tells postgres than title content is more meaningful than the article content itself.
Over the `on` keyword, `'{"title", 'A'}'` means it allows search of the content of "title", with level of priority (weight) "A", which tells postgres than title content is more meaningful than the article content itself.

Now, let's build some models:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ module FullTextSearchableSpec

describe "test tsv searchable" do
it "Can translate client query to ts_query" do
Clear::Model::FullTextSearchable.to_tsq("rick & morty").should eq("'rick & morty'")
Clear::Model::FullTextSearchable.to_tsq("rick+morty").should eq("'rick morty'")
Clear::Model::FullTextSearchable.to_tsq("rick & morty").should eq("'rick' & '&' & 'morty'")
Clear::Model::FullTextSearchable.to_tsq("rick+morty").should eq("'rick' & 'morty'")
Clear::Model::FullTextSearchable.to_tsq("\"rick morty\"").should eq("'rick morty'")
Clear::Model::FullTextSearchable.to_tsq("'rick morty'").should eq("'rick morty'")
Clear::Model::FullTextSearchable.to_tsq("rick morty").should eq("'rick' | 'morty'")
Clear::Model::FullTextSearchable.to_tsq("rick morty").should eq("'rick' & 'morty'")
Clear::Model::FullTextSearchable.to_tsq("rick -morty").should eq("'rick' & !'morty'")
Clear::Model::FullTextSearchable.to_tsq("rick -'rick hunter'").should eq("'rick' & !'rick hunter' ")
Clear::Model::FullTextSearchable.to_tsq("rick -'rick hunter'").should eq("'rick' & !'rick hunter'")
Clear::Model::FullTextSearchable.to_tsq("l'esplanade").should eq("'l''esplanade'")
Clear::Model::FullTextSearchable.to_tsq("'l''usine'").should eq("'l''usine'")
Clear::Model::FullTextSearchable.to_tsq("'l'usine").should eq("'l''usine'")
Clear::Model::FullTextSearchable.to_tsq("'l''usine'").should eq("'l' & 'usine'")
Clear::Model::FullTextSearchable.to_tsq("'l'usine").should eq("'l' & 'usine'")
end

it "Can search through tsvector" do
Expand All @@ -59,8 +59,8 @@ module FullTextSearchableSpec
description: "Going in jail and escape with his innocent brother"})

Series.query.search("breaking").count.should eq 3
Series.query.search("break & !prison").count.should eq 2
Series.query.search("break | throne").count.should eq 4
Series.query.search("break -prison").count.should eq 2
Series.query.search("break throne").count.should eq 0
end
end
end
Expand Down
File renamed without changes.
9 changes: 8 additions & 1 deletion src/clear/extensions/full_text_searchable/migration.cr
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ struct Clear::Migration::FullTextSearchableOperation < Clear::Migration::Operati
IMPORTANT = 'B'
NORMAL = 'C'
LOW = 'D'
VERY_LOW = 'E'
end

getter table : String
Expand All @@ -27,8 +26,16 @@ struct Clear::Migration::FullTextSearchableOperation < Clear::Migration::Operati
@function_name = function_name || "tsv_trigger_#{table}"
end

private def ensure_priority!(field_priority : Char)
unless field_priority >= 'A' && field_priority <= 'D'
raise "Priority level for tsvector range from 'A' (higher) to 'D' (lower)"
end
end

private def print_concat_rules(use_new = true)
src_fields.map do |(field_name, field_priority)|
ensure_priority!(field_priority)

"setweight(to_tsvector(#{Clear::Expression[catalog]}, coalesce(#{use_new && "new." || ""}#{field_name}, ''))," +
" #{Clear::Expression[field_priority]})"
end.join(" || ")
Expand Down
99 changes: 71 additions & 28 deletions src/clear/extensions/full_text_searchable/model.cr
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ require "./tsvector"
module Clear::Model::FullTextSearchable
# Set this model as searchable using tsvector
macro full_text_searchable(through = "full_text_vector", catalog = "pg_catalog.english", scope_name = "search")
# TODO: Use converter and tsv structure
column( {{through.id}} : Clear::TSVector, presence: false, converter: Clear::TSVector::Converter )

scope "#{scope_name.id}" do |str|
scope "{{scope_name.id}}" do |str|
where{ op({{through.id}}, to_tsquery({{catalog}},
Clear::Model::FullTextSearchable.to_tsq(str)), "@@") }
end
Expand All @@ -15,25 +14,70 @@ module Clear::Model::FullTextSearchable
# :nodoc:
# Split a chain written by a user
# A problem to solve is the `'` character
def self.split_to_exp(text)
in_quote = false
quote_start = nil
ignore_next_quote = false
exp = [] of String
private def self.split_to_exp(text)
last_char : Char? = nil
quote_char : Char? = nil
modifier : Symbol? = nil

currtoken = [] of Char
arr_tokens = [] of {Symbol?, String}

text.chars.each_with_index do |c, idx|
case c
when /[A-Z0-9]/i
# if it's a alphanumerical character
ignore_next_quote = true
ignore_next_quote
when '\'', '"'
if (in_quote && quote_start == c)
when '\''
if quote_char.nil?
if last_char.to_s =~ /[a-z0-9]/i # Avoid french word e.g. "l'avion"
currtoken << c
else
quote_char = '\''
end
elsif quote_char == '\''
arr_tokens << {modifier, currtoken.join}
currtoken.clear
modifier = nil
quote_char = nil
else
currtoken << c
end

in_quote = true
quote_start = c
when ' '
if quote_char.nil?
if currtoken.any?
arr_tokens << {modifier, currtoken.join}
currtoken.clear
end
modifier = nil
else
currtoken << c
end
when '"'
if (quote_char.nil?)
quote_char = '"'
elsif quote_char == '"'
arr_tokens << {modifier, currtoken.join}
currtoken.clear
modifier = nil
quote_char = nil
else
currtoken << c
end
when '-'
if currtoken.empty? && quote_char.nil? # When first char of the token == `-`
modifier = :-
else
currtoken << c
end
else
currtoken << c
end

last_char = c
end

if currtoken.any?
arr_tokens << {modifier, currtoken.join}
end

arr_tokens
end

# Parse client side text and generate string ready to be ingested by PG's `to_tsquery`.
Expand All @@ -42,20 +86,19 @@ module Clear::Model::FullTextSearchable
# `search` method use then a wrapper text_to_search used to ensure than
# request is understood and produce ALWAYS legal string for `to_tsquery`
# This is a good helper then to use with the input of your end-users !
#
# However, this helper can be improved, as it doesn't use all the features
# of tsvector (parentesis, OR operator etc...)
def self.to_tsq(text)
return text
current_str = ""
in_quote = false
text.chars.each_with_index do |c, idx|
case c
when '\''
in_quote = !in_quote
if (!in_quote)
current_str
end
when '-'
text = text.gsub(/\+/, " ")
tokens = split_to_exp(text)

return tokens.map do |(modifier, value)|
if modifier == :-
"!" + Clear::Expression[value]
else
Clear::Expression[value]
end
end
end.join(" & ")
end
end

0 comments on commit e8bbfa5

Please sign in to comment.