Skip to content
This repository has been archived by the owner on Apr 5, 2021. It is now read-only.

Commit

Permalink
Merge pull request #80 from RTICWDT/dev-credential-programs
Browse files Browse the repository at this point in the history
Add all_programs_nested option
  • Loading branch information
brownpl authored Nov 5, 2019
2 parents dfa43f8 + f9e3369 commit 017782b
Show file tree
Hide file tree
Showing 9 changed files with 360 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
test-odm:
<<: *shared
docker:
- image: circleci/ruby:2.6.3
- image: circleci/ruby:2.6.5
environment:
BUNDLE_JOBS: 3
BUNDLE_RETRY: 3
Expand Down
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.3
2.6.5
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
source 'https://rubygems.org'
ruby '2.6.3'
ruby '2.6.5'

# Distribute your app as a gem
# gemspec
Expand Down
6 changes: 3 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ GEM
cf-app-utils (0.6)
coderay (1.1.2)
concurrent-ruby (1.1.5)
crass (1.0.4)
crass (1.0.5)
diff-lcs (1.3)
docile (1.3.2)
dotenv (2.7.5)
Expand Down Expand Up @@ -57,7 +57,7 @@ GEM
liquid (3.0.3)
liquify (0.2.7)
liquid (>= 2.2.2)
loofah (2.2.3)
loofah (2.3.1)
crass (~> 1.0.2)
nokogiri (>= 1.5.9)
mail (2.7.1)
Expand Down Expand Up @@ -213,7 +213,7 @@ DEPENDENCIES
typhoeus

RUBY VERSION
ruby 2.6.3p62
ruby 2.6.5p114

BUNDLED WITH
1.17.2
7 changes: 4 additions & 3 deletions app/controllers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def set_content_type(options)
# see comment in method body
def get_search_args_from_params(params)
options = {}
%w(metrics sort fields zip distance page per_page debug keys_nested all_programs).each do |opt|
%w(metrics sort fields zip distance page per_page debug keys_nested all_programs all_programs_nested).each do |opt|
options[opt.to_sym] = params.delete("_#{opt}")
# TODO: remove next line to end support for un-prefixed option parameters
options[opt.to_sym] ||= params.delete(opt)
Expand All @@ -113,8 +113,9 @@ def get_search_args_from_params(params)

options[:fields] = check_fields_for_wildcards(options[:fields])

options[:keys_nested] = check_for_valid_key_format_input(options[:keys_nested])
options[:all_programs] = check_for_valid_key_format_input(options[:all_programs])
options[:keys_nested] = check_for_valid_key_format_input(options[:keys_nested])
options[:all_programs] = check_for_valid_key_format_input(options[:all_programs])
options[:all_programs_nested] = check_for_valid_key_format_input(options[:all_programs_nested])

options[:metrics] = options[:metrics].split(/\s*,\s*/) if options[:metrics]
options
Expand Down
132 changes: 85 additions & 47 deletions lib/data_magic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def self.search(terms, options = {})

time_start = Time.now.to_f
result = client.search full_query

search_time = Time.now.to_f - time_start
logger.info "ES query time (ms): #{result["took"]} ; Query fetch time (s): #{search_time} ; result: #{result.inspect[0..500]}"

Expand Down Expand Up @@ -149,11 +149,12 @@ def self.process_result_from_es( hits, result_processing_info, query_body, optio
# Collect list of nested fields that need to be filtered
# This is neccessary because the standard ES fields filter creates arrays from nested data, which we don't want
nested_fields_filter = result_processing_info[:nested_fields_filter] ? result_processing_info[:nested_fields_filter] : []

if query_body.dig(:_source).class == Hash
all_programs_nested = options[:all_programs_nested]

if query_body.dig(:_source) == { exclude: ["_*"] }
# we're getting the whole document and we can find in _source
results = hits["hits"].map {|hit| hit["_source"]}

# Tested - implementation of nested vs dotted option - when line below is exposed,
# and &keys_nested=true is in query, I get Error: JSON::NestingError - nesting of 100 is too deep
# results = options[:keys_nested] ? NestedHash.new(results) : results
Expand All @@ -165,19 +166,12 @@ def self.process_result_from_es( hits, result_processing_info, query_body, optio
from_source = hit.fetch("_source", {})
dotted_from_source = NestedHash.new.withdotkeys(from_source)
found = found.merge(dotted_from_source)

# When an inner query is submitted, the nested data_type fields are under inner_hits
inner = hit.fetch("inner_hits", {})
delete_set = Set[]

delete_set = Set[]
delete_set.each { |k| found.delete k }

# each result looks like this:
# {"city"=>["Springfield"], "address"=>["742 Evergreen Terrace"], "children" => [{...}, {...}, {...}] }
found.keys.each { |key| found[key] = found[key].length > 1 ? found[key] : found[key][0] }
# now it should look like this:
# {"city"=>"Springfield", "address"=>"742 Evergreen Terrace, "children" => [{...}, {...}, {...}]}


found = transform_array_values(found)

# re-insert null fields that didn't get returned by ES
if query_body[:fields]
query_body[:fields].each do |field|
Expand All @@ -187,39 +181,11 @@ def self.process_result_from_es( hits, result_processing_info, query_body, optio
end
end

# Collect inner hits
nested_details_hash = {}
if !inner.empty?
inner.keys.each do |inn_key|
inner_details = inner[inn_key]["hits"]["hits"].map do |nested_obj|
details = nested_obj.fetch("_source", {})
n_hash = NestedHash.new

details.keys.each do |key|
n_hash[key] = details[key]
end
# Convert to dotted keys
n_hash = n_hash.withdotkeys

# If there is a fields filter for nested datatypes, apply it here
if !nested_fields_filter.empty?
keys_to_keep = nested_fields_filter.select { |f| f.start_with? inn_key }.map do |n|
n.gsub(inn_key + ".","")
end
n_hash_filtered = n_hash.select { |k| keys_to_keep.include?(k) }
end

!n_hash_filtered.nil? ? n_hash_filtered : n_hash
end
# When an inner query is submitted, the nested data_type fields are under inner_hits
inner = hit.fetch("inner_hits", {})

# Set the nested data type string as the key and the array of inner hits as the value
nested_details_hash[inn_key] = inner_details
end
end

# If nested hits, combine with other fields in found hash
if !nested_details_hash.empty?
found = found.merge(nested_details_hash)
if !all_programs_nested && !inner.empty?
found = collect_inner_hits(inner, found, nested_fields_filter)
end

# If keys_nested option passed in params, then return result keys in nested format
Expand All @@ -233,6 +199,78 @@ def self.process_result_from_es( hits, result_processing_info, query_body, optio
results
end

def self.field_type_nested?(field_name)
nested_datatypes = DataMagic.config.es_data_types["nested"]

if nested_datatypes
nested_datatypes.any? {|nested| field_name.start_with? nested }
end
end

def self.transform_array_values(found)
# each result looks like this:
# {
# "city"=>["Springfield"],
# "address"=>["742 Evergreen Terrace"],
# "children" => [{...}, {...}, {...}]
# }
found.keys.each do |key|
nested_data_type = field_type_nested?(key)

# Keep nested datatypes in an array, even when there is just one program
if !nested_data_type && found[key].length <= 1
found[key] = found[key][0]
else
found[key] = found[key]
end
end
# Now, it looks like the following....
# {
# "city"=>"Springfield",
# "address"=>"742 Evergreen Terrace",
# "children" => [{...}, {...}, {...}]
# }

found
end

def self.collect_inner_hits(inner, found, nested_fields_filter)
nested_details_hash = {}

inner.keys.each do |inn_key|
inner_details = inner[inn_key]["hits"]["hits"].map do |nested_obj|
details = nested_obj.fetch("_source", {})
n_hash = NestedHash.new

details.keys.each do |key|
n_hash[key] = details[key]
end
# Convert to dotted keys
n_hash = n_hash.withdotkeys

# If there is a fields filter for nested datatypes, apply it here
if !nested_fields_filter.empty?
keys_to_keep = nested_fields_filter.select { |f| f.start_with? inn_key }.map do |n|
n.gsub(inn_key + ".","")
end
n_hash_filtered = n_hash.select { |k| keys_to_keep.include?(k) }
end

!n_hash_filtered.nil? ? n_hash_filtered : n_hash
end

# Set the nested data type string as the key and the array of inner hits as the value
nested_details_hash[inn_key] = inner_details
end

# If nested hits, combine with other fields in found hash
if !nested_details_hash.empty?
found = found.merge(nested_details_hash)
end

found
end

def self.document_data_type(hash, root='')
hash.each do |key, value|
if value.is_a?(Hash) && value[:type].nil? # things are nested under this
Expand Down
Loading

0 comments on commit 017782b

Please sign in to comment.