From 25fe100e06c9b5cdc0fd6af6d63879f8db66acd6 Mon Sep 17 00:00:00 2001 From: tooyosi Date: Mon, 18 Nov 2024 22:11:05 +0000 Subject: [PATCH 1/4] add extractor name to prediction request for new workflow --- app/services/batch/prediction/create_job.rb | 10 +++++++++- lib/bajor/client.rb | 4 ++-- spec/lib/bajor/client_spec.rb | 6 +++--- spec/services/batch/prediction/create_job_spec.rb | 5 +++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/app/services/batch/prediction/create_job.rb b/app/services/batch/prediction/create_job.rb index 007f5a0..f43353a 100644 --- a/app/services/batch/prediction/create_job.rb +++ b/app/services/batch/prediction/create_job.rb @@ -14,7 +14,15 @@ def initialize(prediction_job, bajor_client = Bajor::Client.new) def run begin - bajor_job_url = bajor_client.create_prediction_job(prediction_job.manifest_url) + subject_set_id = prediction_job.subject_set_id + context = Context.where(active_subject_set_id: subject_set_id).or(Context.where(pool_subject_set_id: subject_set_id)).order(Arel.sql("CASE WHEN active_subject_set_id = #{subject_set_id} THEN 0 ELSE 1 END")) + + workflow_name = nil + if context.first.present? + workflow_name = context.first.extractor_name + end + + bajor_job_url = bajor_client.create_prediction_job(prediction_job.manifest_url, workflow_name) prediction_job.update(state: :submitted, service_job_url: bajor_job_url, message: '') rescue Bajor::Client::Error => e # mark the jobs as failed and record the client error message diff --git a/lib/bajor/client.rb b/lib/bajor/client.rb index 20517d6..7052ff8 100644 --- a/lib/bajor/client.rb +++ b/lib/bajor/client.rb @@ -36,10 +36,10 @@ def create_training_job(manifest_path, workflow_name='cosmic_dawn') bajor_training_job_tracking_url(bajor_response['id']) end - def create_prediction_job(manifest_url) + def create_prediction_job(manifest_url, workflow_name='cosmic_dawn') bajor_response = self.class.post( '/prediction/jobs/', - body: { manifest_url: manifest_url }.to_json, + body: { manifest_url: manifest_url, opts: { 'workflow_name': workflow_name } }.to_json, headers: JSON_HEADERS ) diff --git a/spec/lib/bajor/client_spec.rb b/spec/lib/bajor/client_spec.rb index 4056906..b942737 100644 --- a/spec/lib/bajor/client_spec.rb +++ b/spec/lib/bajor/client_spec.rb @@ -6,6 +6,8 @@ RSpec.describe Bajor::Client do let(:bajor_client) { described_class.new } let(:bajor_host) { 'https://bajor.zooniverse.org' } + let(:run_opts) { '--schema cosmic_dawn' } + let(:workflow_name) { 'cosmic_dawn' } let(:request_headers) do { 'Accept' => 'application/json', @@ -19,8 +21,6 @@ describe 'create_training_job' do let(:request_url) { "#{bajor_host}/training/jobs/" } let(:catalogue_manifest_path) { 'training_catalogues/manifest_path.csv' } - let(:run_opts) { '--schema cosmic_dawn' } - let(:workflow_name) { 'cosmic_dawn' } let(:request_body) do { manifest_path: catalogue_manifest_path, @@ -93,7 +93,7 @@ let(:request_url) { "#{bajor_host}/prediction/jobs/" } let(:manifest_url) { 'https://manifest-host.zooniverse.org/manifest.csv' } let(:request_body) do - { manifest_url: manifest_url } + { manifest_url: manifest_url, opts: { workflow_name: workflow_name} } end let(:job_id) { '3ed68115-dc36-4f66-838c-a52869031c9c' } let(:bajor_response_body) do diff --git a/spec/services/batch/prediction/create_job_spec.rb b/spec/services/batch/prediction/create_job_spec.rb index 1f05fcd..866e9ec 100644 --- a/spec/services/batch/prediction/create_job_spec.rb +++ b/spec/services/batch/prediction/create_job_spec.rb @@ -9,7 +9,7 @@ PredictionJob.new( manifest_url: manifest_url, state: :pending, - subject_set_id: 1, + subject_set_id: 55, probability_threshold: 0.5, randomisation_factor: 0.5 ) @@ -32,8 +32,9 @@ end it 'calls the bajor client service to create a prediction job' do + context = Context.find_by(active_subject_set_id: prediction_job.subject_set_id) prediction_create_job.run - expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url).once + expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url, context.extractor_name).once end it 'updates the state tracking info on the prediction job resource' do From 764236d969fdcc3c57991f5fb136d0a493e8d2ba Mon Sep 17 00:00:00 2001 From: tooyosi Date: Mon, 18 Nov 2024 22:23:48 +0000 Subject: [PATCH 2/4] fix test --- spec/services/batch/prediction/create_job_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/services/batch/prediction/create_job_spec.rb b/spec/services/batch/prediction/create_job_spec.rb index 866e9ec..e682a37 100644 --- a/spec/services/batch/prediction/create_job_spec.rb +++ b/spec/services/batch/prediction/create_job_spec.rb @@ -5,11 +5,12 @@ RSpec.describe Batch::Prediction::CreateJob do describe '#run' do let(:manifest_url) { 'https://manifest-host.zooniverse.org/manifest.csv' } + let(:context){Context.first} let(:prediction_job) do PredictionJob.new( manifest_url: manifest_url, state: :pending, - subject_set_id: 55, + subject_set_id: context.active_subject_set_id, probability_threshold: 0.5, randomisation_factor: 0.5 ) @@ -32,7 +33,6 @@ end it 'calls the bajor client service to create a prediction job' do - context = Context.find_by(active_subject_set_id: prediction_job.subject_set_id) prediction_create_job.run expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url, context.extractor_name).once end From 74fd273d356c9da7de02288beac58dfb105267d0 Mon Sep 17 00:00:00 2001 From: tooyosi Date: Wed, 4 Dec 2024 11:40:15 +0000 Subject: [PATCH 3/4] add new test cases --- app/services/batch/prediction/create_job.rb | 10 +-- spec/fixtures/contexts.yml | 31 ++++++++- spec/lib/bajor/client_spec.rb | 65 +++++++++++++++++++ .../batch/prediction/create_job_spec.rb | 42 +++++++++++- 4 files changed, 141 insertions(+), 7 deletions(-) diff --git a/app/services/batch/prediction/create_job.rb b/app/services/batch/prediction/create_job.rb index f43353a..dba9fe5 100644 --- a/app/services/batch/prediction/create_job.rb +++ b/app/services/batch/prediction/create_job.rb @@ -15,12 +15,12 @@ def initialize(prediction_job, bajor_client = Bajor::Client.new) def run begin subject_set_id = prediction_job.subject_set_id - context = Context.where(active_subject_set_id: subject_set_id).or(Context.where(pool_subject_set_id: subject_set_id)).order(Arel.sql("CASE WHEN active_subject_set_id = #{subject_set_id} THEN 0 ELSE 1 END")) + context = Context + .where(active_subject_set_id: subject_set_id) + .or(Context.where(pool_subject_set_id: subject_set_id)) + .order(Arel.sql("CASE WHEN active_subject_set_id = #{subject_set_id} THEN 0 ELSE 1 END")) - workflow_name = nil - if context.first.present? - workflow_name = context.first.extractor_name - end + workflow_name = context.first&.extractor_name bajor_job_url = bajor_client.create_prediction_job(prediction_job.manifest_url, workflow_name) prediction_job.update(state: :submitted, service_job_url: bajor_job_url, message: '') diff --git a/spec/fixtures/contexts.yml b/spec/fixtures/contexts.yml index 9a02e83..5ec0fb5 100644 --- a/spec/fixtures/contexts.yml +++ b/spec/fixtures/contexts.yml @@ -1,4 +1,4 @@ -galaxy_zoo_active_learning_project: +galaxy_zoo_cosmic_active_learning_project: id: 1 workflow_id: 123 project_id: 39 @@ -7,3 +7,32 @@ galaxy_zoo_active_learning_project: module_name: 'galaxy_zoo' extractor_name: 'cosmic_dawn' +galaxy_zoo_euclid_active_learning_project: + id: 2 + workflow_id: 133 + project_id: 40 + active_subject_set_id: 55 + pool_subject_set_id: 67 + module_name: 'galaxy_zoo' + extractor_name: 'euclid' + + +third_workflow_context: + id: 3 + workflow_id: 143 + project_id: 41 + active_subject_set_id: 70 + pool_subject_set_id: 80 + module_name: 'galaxy_zoo' + extractor_name: 'third_workflow' + +fourth_workflow_context: + id: 4 + workflow_id: 153 + project_id: 42 + active_subject_set_id: 80 + pool_subject_set_id: 70 + module_name: 'galaxy_zoo' + extractor_name: 'fourth_workflow' + + diff --git a/spec/lib/bajor/client_spec.rb b/spec/lib/bajor/client_spec.rb index b942737..274048d 100644 --- a/spec/lib/bajor/client_spec.rb +++ b/spec/lib/bajor/client_spec.rb @@ -68,6 +68,40 @@ end end + context 'with specific workflow_name' do + let(:workflow_name) { 'euclid' } + let(:run_opts) { '--schema euclid' } + + let(:request_body) do + { + manifest_path: catalogue_manifest_path, + opts: { + run_opts: run_opts, + workflow_name: workflow_name + } + } + end + let(:request) do + stub_request(:post, request_url) + .with( + body: request_body, + headers: request_headers + ) + end + + before do + request.to_return(status: 201, body: bajor_response_body.to_json, headers: { content_type: 'application/json' }) + end + + it 'sends the right workflow name' do + bajor_client.create_training_job(catalogue_manifest_path, workflow_name) + expect( + a_request(:post, request_url).with(body: request_body, headers: request_headers) + ).to have_been_made.once + end + + end + context 'with a failed repsonse' do let(:error_message) do 'Active Jobs are running in the batch system - please wait till they are fininshed processing.' @@ -133,6 +167,37 @@ end end + context 'with specific workflow_name' do + let(:workflow_name) { 'euclid' } + let(:run_opts) { '--schema euclid' } + + let(:request_body) do + { manifest_url: manifest_url, opts: { workflow_name: workflow_name} } + end + + let(:request) do + stub_request(:post, request_url) + .with( + body: request_body, + headers: request_headers + ) + end + + before do + request.to_return(status: 201, body: bajor_response_body.to_json, headers: { content_type: 'application/json' }) + end + + it 'sends the right workflow name' do + + bajor_client.create_prediction_job(manifest_url, 'euclid') + + + expect( + a_request(:post, request_url).with(body: request_body, headers: request_headers) + ).to have_been_made.once + end + end + context 'with a failed repsonse' do let(:error_message) do 'Active Jobs are running in the batch system - please wait till they are fininshed processing.' diff --git a/spec/services/batch/prediction/create_job_spec.rb b/spec/services/batch/prediction/create_job_spec.rb index e682a37..9cf771b 100644 --- a/spec/services/batch/prediction/create_job_spec.rb +++ b/spec/services/batch/prediction/create_job_spec.rb @@ -4,8 +4,10 @@ RSpec.describe Batch::Prediction::CreateJob do describe '#run' do + fixtures :contexts + let(:manifest_url) { 'https://manifest-host.zooniverse.org/manifest.csv' } - let(:context){Context.first} + let(:context){ contexts(:galaxy_zoo_cosmic_active_learning_project) } let(:prediction_job) do PredictionJob.new( manifest_url: manifest_url, @@ -37,6 +39,44 @@ expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url, context.extractor_name).once end + describe 'prediction_job with pool_subject_set_id' do + let(:context){ contexts(:galaxy_zoo_euclid_active_learning_project) } + let(:prediction_job) do + PredictionJob.new( + manifest_url: manifest_url, + state: :pending, + subject_set_id: context.pool_subject_set_id, + probability_threshold: 0.5, + randomisation_factor: 0.5 + ) + end + + it 'calls the bajor client service with workflow name from pool_subject_set_id' do + described_class.new(prediction_job, bajor_client_double).run + expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url, context.extractor_name).once + end + end + + describe 'with same active_subject_id and pool_subject_set_id' do + let(:context1){ contexts(:third_workflow_context) } + let(:context2){ contexts(:fourth_workflow_context) } + let(:prediction_job) do + PredictionJob.new( + manifest_url: manifest_url, + state: :pending, + subject_set_id: context2.pool_subject_set_id, + probability_threshold: 0.5, + randomisation_factor: 0.5 + ) + end + + it 'calls the bajor client service with workflow name from an active_subject_set_id' do + + described_class.new(prediction_job, bajor_client_double).run + expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url, context1.extractor_name).once + end + end + it 'updates the state tracking info on the prediction job resource' do expect { prediction_create_job.run From da3dd75a1de5df970d688c30c9271a1503c8e940 Mon Sep 17 00:00:00 2001 From: tooyosi Date: Thu, 12 Dec 2024 13:51:23 +0000 Subject: [PATCH 4/4] rely on only pool_subject_set_id for fetching prediction context --- app/services/batch/prediction/create_job.rb | 5 +---- spec/fixtures/contexts.yml | 21 ------------------ .../batch/prediction/create_job_spec.rb | 22 +------------------ 3 files changed, 2 insertions(+), 46 deletions(-) diff --git a/app/services/batch/prediction/create_job.rb b/app/services/batch/prediction/create_job.rb index dba9fe5..920e5af 100644 --- a/app/services/batch/prediction/create_job.rb +++ b/app/services/batch/prediction/create_job.rb @@ -15,10 +15,7 @@ def initialize(prediction_job, bajor_client = Bajor::Client.new) def run begin subject_set_id = prediction_job.subject_set_id - context = Context - .where(active_subject_set_id: subject_set_id) - .or(Context.where(pool_subject_set_id: subject_set_id)) - .order(Arel.sql("CASE WHEN active_subject_set_id = #{subject_set_id} THEN 0 ELSE 1 END")) + context = Context.where(pool_subject_set_id: subject_set_id) workflow_name = context.first&.extractor_name diff --git a/spec/fixtures/contexts.yml b/spec/fixtures/contexts.yml index 5ec0fb5..dec7a2e 100644 --- a/spec/fixtures/contexts.yml +++ b/spec/fixtures/contexts.yml @@ -15,24 +15,3 @@ galaxy_zoo_euclid_active_learning_project: pool_subject_set_id: 67 module_name: 'galaxy_zoo' extractor_name: 'euclid' - - -third_workflow_context: - id: 3 - workflow_id: 143 - project_id: 41 - active_subject_set_id: 70 - pool_subject_set_id: 80 - module_name: 'galaxy_zoo' - extractor_name: 'third_workflow' - -fourth_workflow_context: - id: 4 - workflow_id: 153 - project_id: 42 - active_subject_set_id: 80 - pool_subject_set_id: 70 - module_name: 'galaxy_zoo' - extractor_name: 'fourth_workflow' - - diff --git a/spec/services/batch/prediction/create_job_spec.rb b/spec/services/batch/prediction/create_job_spec.rb index 9cf771b..ee1a330 100644 --- a/spec/services/batch/prediction/create_job_spec.rb +++ b/spec/services/batch/prediction/create_job_spec.rb @@ -12,7 +12,7 @@ PredictionJob.new( manifest_url: manifest_url, state: :pending, - subject_set_id: context.active_subject_set_id, + subject_set_id: context.pool_subject_set_id, probability_threshold: 0.5, randomisation_factor: 0.5 ) @@ -57,26 +57,6 @@ end end - describe 'with same active_subject_id and pool_subject_set_id' do - let(:context1){ contexts(:third_workflow_context) } - let(:context2){ contexts(:fourth_workflow_context) } - let(:prediction_job) do - PredictionJob.new( - manifest_url: manifest_url, - state: :pending, - subject_set_id: context2.pool_subject_set_id, - probability_threshold: 0.5, - randomisation_factor: 0.5 - ) - end - - it 'calls the bajor client service with workflow name from an active_subject_set_id' do - - described_class.new(prediction_job, bajor_client_double).run - expect(bajor_client_double).to have_received(:create_prediction_job).with(manifest_url, context1.extractor_name).once - end - end - it 'updates the state tracking info on the prediction job resource' do expect { prediction_create_job.run