diff --git a/index/images.mk b/index/images.mk index c1f27a494..8c843ee28 100644 --- a/index/images.mk +++ b/index/images.mk @@ -7,7 +7,7 @@ all: import_es import_es: dump_jsonl $(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id" -dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvotes downvotes faves tag_names +dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;' sed -i images.jsonl -e 's/\\\\/\\/g' @@ -15,6 +15,8 @@ dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvo metadata: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL insert into temp_images.image_search_json (image_id, object) select id, jsonb_build_object( + 'approved', approved, + 'animated', is_animated, 'anonymous', anonymous, 'aspect_ratio', nullif(image_aspect_ratio, 'NaN'::float8), 'comment_count', comments_count, @@ -23,6 +25,7 @@ metadata: image_search_json 'description', description, 'downvotes', downvotes_count, 'duplicate_id', duplicate_id, + 'duration', (case when is_animated then image_duration else 0::float end), 'faves', faves_count, 'file_name', image_name, 'fingerprint', fingerprint, @@ -35,10 +38,11 @@ metadata: image_search_json 'orig_sha512_hash', image_orig_sha512_hash, 'original_format', image_format, 'pixels', cast(image_width as bigint)*cast(image_height as bigint), + 'processed', processed, 'score', score, 'size', image_size, 'sha512_hash', image_sha512_hash, - 'source_url', lower(source_url), + 'thumbnails_generated', thumbnails_generated, 'updated_at', updated_at, 'upvotes', upvotes_count, 'width', image_width, @@ -64,33 +68,49 @@ deleters: image_search_json galleries: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_interactions', jsonb_agg(jsonb_build_object('id', gi.gallery_id, 'position', gi.position))) from gallery_interactions gi group by image_id; - insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_id', jsonb_agg(gi.gallery_id)) from gallery_interactions gi group by image_id; - insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_position', jsonb_object_agg(gi.gallery_id, gi.position)) from gallery_interactions gi group by image_id; SQL tags: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL - insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object('tag_ids', jsonb_agg(it.tag_id), 'tag_count', count(*)) from image_taggings it group by image_id; + insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object( + 'tag_ids', jsonb_agg(it.tag_id), + 'tag_count', count(*), + 'error_tag_count', count(case when t.category = 'error' then t.category else null end), + 'rating_tag_count', count(case when t.category = 'rating' then t.category else null end), + 'origin_tag_count', count(case when t.category = 'origin' then t.category else null end), + 'character_tag_count', count(case when t.category = 'character' then t.category else null end), + 'oc_tag_count', count(case when t.category = 'oc' then t.category else null end), + 'species_tag_count', count(case when t.category = 'species' then t.category else null end), + 'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end), + 'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end), + 'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end), + 'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end), + ) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id; + SQL + +sources: image_search_json + psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL + insert into temp_images.image_search_json (image_id, object) select s.image_id, jsonb_build_object('source_url', jsonb_agg(lower(s.source)), 'source_count', count(*)) from image_sources s group by image_id; SQL hides: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL - insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_ids', jsonb_agg(ih.user_id), 'hidden_by', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id; + insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_user_ids', jsonb_agg(ih.user_id), 'hidden_by_users', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id; SQL downvotes: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL - insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoted_by_ids', jsonb_agg(iv.user_id), 'downvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id; + insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoter_ids', jsonb_agg(iv.user_id), 'downvoters', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id; SQL upvotes: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL - insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoted_by_ids', jsonb_agg(iv.user_id), 'upvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id; + insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoter_ids', jsonb_agg(iv.user_id), 'upvoters', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id; SQL faves: image_search_json psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL - insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('faved_by_ids', jsonb_agg(if.user_id), 'faved_by', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id; + insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('favourited_by_user_ids', jsonb_agg(if.user_id), 'favourited_by_users', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id; SQL tag_names: tags_with_aliases diff --git a/lib/philomena/images/elasticsearch_index.ex b/lib/philomena/images/elasticsearch_index.ex index b76912bdd..0e5fb2969 100644 --- a/lib/philomena/images/elasticsearch_index.ex +++ b/lib/philomena/images/elasticsearch_index.ex @@ -56,6 +56,7 @@ defmodule Philomena.Images.ElasticsearchIndex do size: %{type: "integer"}, sha512_hash: %{type: "keyword"}, source_url: %{type: "keyword"}, + source_count: %{type: "integer"}, tag_count: %{type: "integer"}, tag_ids: %{type: "keyword"}, tags: %{type: "text", analyzer: "keyword"}, @@ -87,7 +88,17 @@ defmodule Philomena.Images.ElasticsearchIndex do namespace: %{type: "keyword"} } }, - approved: %{type: "boolean"} + approved: %{type: "boolean"}, + error_tag_count: %{type: "integer"}, + rating_tag_count: %{type: "integer"}, + origin_tag_count: %{type: "integer"}, + character_tag_count: %{type: "integer"}, + oc_tag_count: %{type: "integer"}, + species_tag_count: %{type: "integer"}, + body_type_tag_count: %{type: "integer"}, + content_fanmade_tag_count: %{type: "integer"}, + content_official_tag_count: %{type: "integer"}, + spoiler_tag_count: %{type: "integer"} } } } @@ -120,6 +131,7 @@ defmodule Philomena.Images.ElasticsearchIndex do uploader: if(!!image.user and !image.anonymous, do: String.downcase(image.user.name)), true_uploader: if(!!image.user, do: String.downcase(image.user.name)), source_url: image.sources |> Enum.map(&String.downcase(&1.source)), + source_count: length(image.sources), file_name: image.image_name, original_format: image.image_format, processed: image.processed, @@ -151,7 +163,17 @@ defmodule Philomena.Images.ElasticsearchIndex do upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)), downvoters: image.downvoters |> Enum.map(&String.downcase(&1.name)), deleted_by_user: if(!!image.deleter, do: image.deleter.name), - approved: image.approved + approved: image.approved, + error_tag_count: Enum.count(image.tags, &(&1.category == "error")), + rating_tag_count: Enum.count(image.tags, &(&1.category == "rating")), + origin_tag_count: Enum.count(image.tags, &(&1.category == "origin")), + character_tag_count: Enum.count(image.tags, &(&1.category == "character")), + oc_tag_count: Enum.count(image.tags, &(&1.category == "oc")), + species_tag_count: Enum.count(image.tags, &(&1.category == "species")), + body_type_tag_count: Enum.count(image.tags, &(&1.category == "body-type")), + content_fanmade_tag_count: Enum.count(image.tags, &(&1.category == "content-fanmade")), + content_official_tag_count: Enum.count(image.tags, &(&1.category == "content-official")), + spoiler_tag_count: Enum.count(image.tags, &(&1.category == "spoiler")) } end