-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from pulibrary/experiment_solr
Generate Fixture Data Docker Image
- Loading branch information
Showing
10 changed files
with
642 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
name: dpul-collections | ||
services: | ||
solr: | ||
api: 3 | ||
type: lando | ||
app_mount: false | ||
moreHttpPorts: | ||
- 8983 | ||
services: | ||
image: ghcr.io/pulibrary/dpul-collections:fixtures-v1 | ||
ports: | ||
- "8983:8983" | ||
command: solr-fg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
figgy_manifest_fixtures |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM solr:8.4 | ||
USER root | ||
RUN apt-get update && apt-get install ruby -y | ||
USER solr | ||
ADD solr/config /opt/solr/solrconfig | ||
RUN start-local-solr && /opt/solr/bin/solr create -c core -d /opt/solr/solrconfig | ||
ADD figgy_manifest_fixtures /opt/solr/figgy_manifest_fixtures | ||
RUN start-local-solr && /opt/solr/bin/post -c core /opt/solr/figgy_manifest_fixtures |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# frozen_string_literal: true | ||
|
||
source "https://rubygems.org" | ||
|
||
gem "pry", "~> 0.14.2" | ||
|
||
gem "parallel", "~> 1.24" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
GEM | ||
remote: https://rubygems.org/ | ||
specs: | ||
coderay (1.1.3) | ||
method_source (1.0.0) | ||
parallel (1.24.0) | ||
pry (0.14.2) | ||
coderay (~> 1.1) | ||
method_source (~> 1.0) | ||
|
||
PLATFORMS | ||
arm64-darwin-21 | ||
|
||
DEPENDENCIES | ||
parallel (~> 1.24) | ||
pry (~> 0.14.2) | ||
|
||
BUNDLED WITH | ||
2.4.10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Fixture Container | ||
|
||
This directory contains a Dockerfile for publishing a Solr image pre-loaded with fixtures for our DPUL-Collections tech experiments, so we don't have to mess with indexing for those tests. | ||
|
||
You can run this container like so: | ||
|
||
`docker run -p 8983:8983 ghcr.io/pulibrary/dpul-collections:fixtures-v1` | ||
|
||
## Build Instructions | ||
|
||
1. `bundle install` | ||
1. `bundle exec generate-fixtures` | ||
* This is a separate process because it takes a long time, and we don't want the docker build to have to do it for every rebuild. | ||
1. `export CR_PAT=$(lpass show dpul_collections_fixture_container_github_token --notes)` | ||
1. `echo $CR_PAT | docker login ghcr.io -u pulbot --password-stdin` | ||
1. `docker buildx create --name multiarch --driver docker-container --use` | ||
1. `docker buildx build --push --platform linux/arm64,linux/amd64 -t ghcr.io/pulibrary/dpul-collections:fixtures-v1 .` | ||
|
||
## Regenerate Cached Files | ||
|
||
Delete cached-collection-manifest.json and figgy_manifest_fixtures, then follow the directions above if you want a full from-scratch rebuild. |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env ruby | ||
# frozen_string_literal: true | ||
|
||
require 'open-uri' | ||
require 'json' | ||
require 'pry' | ||
require 'fileutils' | ||
require 'parallel' | ||
|
||
COLLECTION_MANIFEST = 'https://figgy.princeton.edu/collections/b80f8d41-3be5-440e-8bdb-eff6489f3088/manifest' | ||
# COLLECTION_MANIFEST = 'https://figgy.princeton.edu/collections/6ff2c854-f102-4a5e-861d-276179a3a5f0/manifest' | ||
|
||
puts 'Parsing collection manifest' | ||
manifest = if File.exist?('cached-collection-manifest.json') | ||
File.read('cached-collection-manifest.json') | ||
else | ||
content = URI.open(COLLECTION_MANIFEST, read_timeout: 900).read | ||
File.open('cached-collection-manifest.json', "w") do |f| | ||
f.puts content | ||
end | ||
content | ||
end | ||
manifest = JSON.parse(manifest) | ||
FileUtils.mkdir('figgy_manifest_fixtures') unless File.exist?('figgy_manifest_fixtures') | ||
|
||
Parallel.each(manifest['manifests'], in_processes: 30) do |member_manifest| | ||
puts "Parsing #{member_manifest['@id']}" | ||
begin | ||
manifest = JSON.parse(URI.open(member_manifest['@id']).read) | ||
see_also = manifest['seeAlso'] | ||
see_also = [see_also] unless see_also.is_a?(Array) | ||
see_also = see_also.find { |x| x['format'] == 'application/ld+json' } | ||
metadata = JSON.parse(URI.open(see_also['@id']).read) | ||
identifier = metadata['identifier']&.first&.split('/')&.last | ||
title = metadata['title'].first | ||
if title.is_a?(Hash) | ||
title = title['@value'] | ||
end | ||
json = { | ||
id: identifier, | ||
title_tesi: title, | ||
description_tesim: metadata['abstract'], | ||
thumbnail_ssi: manifest['thumbnail']['@id'], | ||
manifest_ssi: manifest['@id'] | ||
} | ||
File.open("figgy_manifest_fixtures/#{identifier}.json", 'w') do |f| | ||
f.puts JSON.dump(json) | ||
end | ||
rescue | ||
end | ||
end |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
<?xml version="1.0" encoding="UTF-8" ?> | ||
<config> | ||
<!-- NOTE: various comments and unused configuration possibilities have been purged | ||
from this file. Please refer to http://wiki.apache.org/solr/SolrConfigXml, | ||
as well as the default solrconfig file included with Solr --> | ||
|
||
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> | ||
|
||
<luceneMatchVersion>6.1.0</luceneMatchVersion> | ||
|
||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/> | ||
|
||
<updateHandler class="solr.DirectUpdateHandler2"> | ||
<updateLog> | ||
<str name="dir">${solr.core0.data.dir:}</str> | ||
</updateLog> | ||
<!-- Hard commit every 60 minutes --> | ||
<autoCommit> | ||
<maxDocs>10000</maxDocs> | ||
<maxTime>36000000</maxTime> | ||
<openSearcher>false</openSearcher> | ||
</autoCommit> | ||
<!-- Soft commit every 5 minutes --> | ||
<autoSoftCommit> | ||
<maxTime>300000</maxTime> | ||
</autoSoftCommit> | ||
</updateHandler> | ||
|
||
<!-- solr lib dirs --> | ||
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" /> | ||
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" /> | ||
|
||
<dataDir>${solr.data.dir:}</dataDir> | ||
|
||
<requestHandler name="search" class="solr.SearchHandler" default="true"> | ||
<!-- default values for query parameters can be specified, these | ||
will be overridden by parameters in the request | ||
--> | ||
<lst name="defaults"> | ||
<str name="defType">edismax</str> | ||
<str name="echoParams">explicit</str> | ||
<str name="q.alt">*:*</str> | ||
<str name="mm">2<-1 5<-2 6<90%</str> | ||
<int name="qs">1</int> | ||
<int name="ps">2</int> | ||
<float name="tie">0.01</float> | ||
<!-- this qf and pf are used by default, if not otherwise specified by | ||
client. The default blacklight_config will use these for the | ||
"keywords" search. See the author_qf/author_pf, title_qf, etc | ||
below, which the default blacklight_config will specify for | ||
those searches. You may also be interested in: | ||
http://wiki.apache.org/solr/LocalParams | ||
--> | ||
<str name="qf"> | ||
id | ||
full_title_tesim | ||
short_title_tesim | ||
alternative_title_tesim | ||
active_fedora_model_ssi | ||
title_tesim | ||
author_tesim | ||
subject_tesim | ||
all_text_timv | ||
</str> | ||
<str name="pf"> | ||
all_text_timv^10 | ||
</str> | ||
|
||
<str name="author_qf"> | ||
author_tesim | ||
</str> | ||
<str name="author_pf"> | ||
</str> | ||
<str name="title_qf"> | ||
title_tesim | ||
full_title_tesim | ||
short_title_tesim | ||
alternative_title_tesim | ||
</str> | ||
<str name="title_pf"> | ||
</str> | ||
<str name="subject_qf"> | ||
subject_tesim | ||
</str> | ||
<str name="subject_pf"> | ||
</str> | ||
|
||
<str name="fl"> | ||
*, | ||
score | ||
</str> | ||
|
||
<str name="facet">true</str> | ||
<str name="facet.mincount">1</str> | ||
<str name="facet.limit">10</str> | ||
<str name="facet.field">active_fedora_model_ssi</str> | ||
<str name="facet.field">subject_ssim</str> | ||
|
||
<str name="spellcheck">true</str> | ||
<str name="spellcheck.dictionary">default</str> | ||
<str name="spellcheck.onlyMorePopular">true</str> | ||
<str name="spellcheck.extendedResults">true</str> | ||
<str name="spellcheck.collate">false</str> | ||
<str name="spellcheck.count">5</str> | ||
|
||
</lst> | ||
<arr name="last-components"> | ||
<str>spellcheck</str> | ||
</arr> | ||
</requestHandler> | ||
|
||
<requestHandler name="permissions" class="solr.SearchHandler" > | ||
<lst name="defaults"> | ||
<str name="facet">off</str> | ||
<str name="echoParams">all</str> | ||
<str name="rows">1</str> | ||
<str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 --> | ||
<str name="fl"> | ||
id, | ||
access_ssim, | ||
discover_access_group_ssim,discover_access_person_ssim, | ||
read_access_group_ssim,read_access_person_ssim, | ||
edit_access_group_ssim,edit_access_person_ssim, | ||
depositor_ti, | ||
embargo_release_date_dtsi | ||
inheritable_access_ssim, | ||
inheritable_discover_access_group_ssim,inheritable_discover_access_person_ssim, | ||
inheritable_read_access_group_ssim,inheritable_read_access_person_ssim, | ||
inheritable_edit_access_group_ssim,inheritable_edit_access_person_ssim, | ||
inheritable_embargo_release_date_dtsi | ||
</str> | ||
</lst> | ||
</requestHandler> | ||
|
||
<requestHandler name="standard" class="solr.SearchHandler"> | ||
<lst name="defaults"> | ||
<str name="echoParams">explicit</str> | ||
<str name="defType">lucene</str> | ||
</lst> | ||
</requestHandler> | ||
|
||
<!-- for requests to get a single document; use id=666 instead of q=id:666 --> | ||
<requestHandler name="document" class="solr.SearchHandler" > | ||
<lst name="defaults"> | ||
<str name="echoParams">all</str> | ||
<str name="fl">*</str> | ||
<str name="rows">1</str> | ||
<str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 --> | ||
</lst> | ||
</requestHandler> | ||
|
||
|
||
<searchComponent name="spellcheck" class="solr.SpellCheckComponent"> | ||
<str name="queryAnalyzerFieldType">textSpell</str> | ||
<!-- Multiple "Spell Checkers" can be declared and used by this component | ||
(e.g. for title_spell field) | ||
--> | ||
<lst name="spellchecker"> | ||
<str name="name">default</str> | ||
<str name="field">spell</str> | ||
<str name="spellcheckIndexDir">./spell</str> | ||
<str name="buildOnOptimize">true</str> | ||
</lst> | ||
</searchComponent> | ||
|
||
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" /> | ||
|
||
<requestDispatcher handleSelect="true" > | ||
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" /> | ||
</requestDispatcher> | ||
|
||
<requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" /> | ||
</config> |