Skip to content
This repository has been archived by the owner on May 28, 2024. It is now read-only.

Commit

Permalink
Merge pull request #1081 from sul-dlss/rolling-indexer-tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
mjgiarlo authored Jan 9, 2024
2 parents 4e239cd + 9eb9123 commit a5b0fee
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
10 changes: 7 additions & 3 deletions bin/rolling_index
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require_relative '../config/environment'
require 'daemons'

QUERY = { q: '*:*', sort: 'timestamp asc', fl: 'id', rows: Settings.rolling_indexer.batch_size }
QUERY = { q: '*:*', sort: 'timestamp asc', fl: 'id,timestamp', rows: Settings.rolling_indexer.batch_size }

Daemons.run_proc(
File.basename(__FILE__),
Expand Down Expand Up @@ -40,12 +40,16 @@ Daemons.run_proc(
end
end.compact

solr_conn.add(solr_docs, add_attributes: { commitWithin: 1000 })
solr_conn.add(solr_docs, add_attributes: { commitWithin: Settings.rolling_indexer.commit_within.to_i })

end_time = Time.now
batch_run_seconds = (end_time - start_time).round(3)
first_doc = response['response']['docs'].first
first_doc_str = "#{first_doc['id']} (#{first_doc['timestamp']})"
last_doc = response['response']['docs'].last
last_doc_str = "#{last_doc['id']} (#{last_doc['timestamp']})"
# The Daemons gem will redirect this to its log
puts "#{end_time}\tIndexed #{Settings.rolling_indexer.batch_size} documents in #{batch_run_seconds}"
puts "#{end_time}\tIndexed #{Settings.rolling_indexer.batch_size} documents in #{batch_run_seconds} (#{first_doc_str} - #{last_doc_str})"

sleep(Settings.rolling_indexer.pause_time_between_batches)
end
Expand Down
8 changes: 6 additions & 2 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
date_format_str: '%Y-%m-%d %H:%M:%S.%L'
rolling_indexer:
batch_size: 500
pause_time_between_docs: .2
pause_time_between_batches: 0
# in seconds
pause_time_between_docs: 0.2
# commitWithin does a soft commit by default, so a little more than commitWithin is desired (seconds)
pause_time_between_batches: 11
# milliseconds
commitWithin: 1000

ssl:
cert_file: ~
Expand Down

0 comments on commit a5b0fee

Please sign in to comment.