From 47b6fed6345feb581069992491ab1d53a0a89971 Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Wed, 6 Mar 2024 15:55:12 -0500 Subject: [PATCH] DEV-994 OFFSET and LIMIT Queries (#8) * DEV-994 Rights API: OFFSET and LIMIT Queries - Add `QueryParser` class. - Process CGI parameters and Integrate `id` queries. - Remove `wait-for` from Dockerfile and docker-compose - Use docker-compose healthchecks, remove `test` service - Rename test.yml to tests.yml - Add millisecond timer and result field for Sequel operations. --- .github/workflows/{test.yml => tests.yml} | 2 +- Dockerfile | 2 - README.md | 13 ++-- docker-compose.yml | 26 +++---- lib/rights_api.rb | 1 + lib/rights_api/app.rb | 18 +++-- lib/rights_api/error.rb | 8 +++ lib/rights_api/query.rb | 44 ++++++++---- lib/rights_api/query_parser.rb | 84 +++++++++++++++++++++++ lib/rights_api/result.rb | 6 +- lib/rights_api/result/error_result.rb | 18 +++++ spec/integration/support_table_spec.rb | 42 ++++++++++++ spec/shared_examples.rb | 6 ++ spec/spec_helper.rb | 4 ++ spec/unit/query_parser_spec.rb | 48 +++++++++++++ spec/unit/query_spec.rb | 7 +- spec/unit/result/error_result_spec.rb | 21 ++++++ spec/unit/result_spec.rb | 6 +- 18 files changed, 309 insertions(+), 47 deletions(-) rename .github/workflows/{test.yml => tests.yml} (93%) create mode 100644 lib/rights_api/error.rb create mode 100644 lib/rights_api/query_parser.rb create mode 100644 lib/rights_api/result/error_result.rb create mode 100644 spec/unit/query_parser_spec.rb create mode 100644 spec/unit/result/error_result_spec.rb diff --git a/.github/workflows/test.yml b/.github/workflows/tests.yml similarity index 93% rename from .github/workflows/test.yml rename to .github/workflows/tests.yml index 36b07c7..9a3b601 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: run: docker compose run web bundle exec standardrb - name: Run tests - run: docker compose run test + run: docker compose run web bundle exec rspec - name: Report to Coveralls uses: coverallsapp/github-action@1.1.3 diff --git a/Dockerfile b/Dockerfile index 33cee38..f08d0a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,5 +12,3 @@ WORKDIR /usr/src/app ENV BUNDLE_PATH /gems # RUN gem install bundler - -RUN wget -O /usr/local/bin/wait-for https://github.com/eficode/wait-for/releases/download/v2.2.3/wait-for; chmod +x /usr/local/bin/wait-for diff --git a/README.md b/README.md index c4d9e90..c0277f8 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ ```bash git clone https://github.com/hathitrust/rights_api cd rights_api -docker-compose build -docker-compose run --rm web bundle install -docker-compose up -d +docker compose build +docker compose run --rm web bundle install +docker compose up -d ``` ## Naming Conventions @@ -59,7 +59,7 @@ See `lib/rights_api/app.rb` for all of the Sinatra routes. ## Results -All API results, even 404s (which should only occur with a bad Sinatra route) should return +All API results, even `404`s (which should only occur with a bad Sinatra route) should return the same general JSON structure. Here's the empty variant: ```JSON @@ -93,6 +93,11 @@ Here's a truncated result from `http://localhost:4567/v1/access_profiles`: } ``` + +`400` error results from bogus parameters (e.g., `&limit=blah`) will have an additional +`error` Field in the return structure. This is human-readable and may include a backtrace, +we're not sure yet. + ## Testing The test suite is divided into unit and integration tests which can be run separately to give some orthogonality in checking for coverage gaps. diff --git a/docker-compose.yml b/docker-compose.yml index 5692d1d..ec24f63 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,16 @@ --- version: '3' -services: +x-condition-healthy: &healthy + condition: service_healthy + +x-healthcheck-defaults: &healthcheck-defaults + interval: 5s + timeout: 10s + start_period: 10s + retries: 5 +services: web: build: . ports: @@ -15,18 +23,7 @@ services: RIGHTS_API_DATABASE_CONNECTION_STRING: "mysql2://ht_rights:ht_rights@mariadb/ht" RIGHTS_API_LOGGER_LEVEL: 1 # Logger::INFO depends_on: - - mariadb - - test: - build: . - volumes: - - .:/usr/src/app - - gem_cache:/gems - command: bash -c "/usr/local/bin/wait-for mariadb:3306 && bundle exec rspec" - environment: - RIGHTS_API_DATABASE_CONNECTION_STRING: "mysql2://ht_rights:ht_rights@mariadb/ht" - depends_on: - - mariadb + mariadb: *healthy mariadb: image: ghcr.io/hathitrust/db-image @@ -38,6 +35,9 @@ services: MYSQL_DATABASE: ht MYSQL_USER: ht_rights MYSQL_PASSWORD: ht_rights + healthcheck: + <<: *healthcheck-defaults + test: [ "CMD", "healthcheck.sh", "--su-mysql", "--connect", "--innodb_initialized" ] volumes: gem_cache: diff --git a/lib/rights_api.rb b/lib/rights_api.rb index d205233..cb5c811 100644 --- a/lib/rights_api.rb +++ b/lib/rights_api.rb @@ -7,6 +7,7 @@ module RightsAPI require "rights_api/database" require "rights_api/query" require "rights_api/result" +require "rights_api/result/error_result" require "rights_api/schema" require "rights_api/services" require "rights_api/settings" diff --git a/lib/rights_api/app.rb b/lib/rights_api/app.rb index f934add..547e20e 100644 --- a/lib/rights_api/app.rb +++ b/lib/rights_api/app.rb @@ -10,6 +10,7 @@ require_relative "query" require_relative "result" +require_relative "result/error_result" require_relative "schema" module RightsAPI @@ -26,22 +27,29 @@ class App < Sinatra::Base # The full-featured search Schema.names.each do |name| get "/v1/#{name}/?" do - do_query(table_name: name) + params = CGI.parse(request.query_string) + model = Schema.model_for(name: name) + do_query(model: model, params: params) end end # The "by id" queries Schema.names.each do |name| get "/v1/#{name}/:id" do |id| - do_query(table_name: name, id: id) + model = Schema.model_for(name: name) + params = {model.default_key.to_s => [id]} + do_query(model: model, params: params) end end private - def do_query(table_name:, id: nil) - query = Query.new(table_name: table_name) - json query.run(id: id).to_h + def do_query(model:, params: {}) + query = Query.new(model: model, params: params) + json query.run.to_h + rescue QueryParserError, Sequel::Error => e + status 400 + json ErrorResult.new(exception: e).to_h end end end diff --git a/lib/rights_api/error.rb b/lib/rights_api/error.rb new file mode 100644 index 0000000..80d4067 --- /dev/null +++ b/lib/rights_api/error.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module RightsAPI + class QueryParserError < StandardError + # This is raised whenever the query parser determines the query is ill-formed. + # Should result in a 400 Bad Request response + end +end diff --git a/lib/rights_api/query.rb b/lib/rights_api/query.rb index bdb24a4..7ab8c52 100644 --- a/lib/rights_api/query.rb +++ b/lib/rights_api/query.rb @@ -1,30 +1,44 @@ # frozen_string_literal: true +require "benchmark" +require "cgi" + +require_relative "error" +require_relative "query_parser" require_relative "result" require_relative "services" module RightsAPI class Query - DEFAULT_LIMIT = 1000 - attr_reader :table_name + attr_reader :model, :params, :parser, :total, :dataset - # @param name [String, Symbol] The name of the table to be queried. - def initialize(table_name:) - @table_name = table_name + # @param model [Class] Sequel::Model subclass for the table being queried + # @param params [Hash] CGI parameters submitted to the Sinatra frontend + def initialize(model:, params: {}) + @model = model + @params = params + @parser = QueryParser.new(model: model) + @total = 0 + @dataset = nil end - # @param id [String] The primary value to retrieve, or nil for all rows. # @return [Result] - def run(id: nil) - model = Schema.model_for name: table_name - dataset = model.base_dataset - if id - where = {model.query_for_field(field: model.default_key) => id} - dataset = dataset.where(where) + def run + # This may raise QueryParserError + parser.parse(params: params) + time_delta = Benchmark.realtime do + @dataset = model.base_dataset + parser.where.each do |where| + @dataset = dataset.where(where) + end + # Save this here because offset and limit may alter the count. + @total = dataset.count + @dataset = dataset.order(*parser.order) + .offset(parser.offset) + .limit(parser.limit) + .all end - dataset = dataset.order(model.default_order) - .limit(DEFAULT_LIMIT).all - result = Result.new(total: dataset.count) + result = Result.new(offset: parser.offset, total: total, milliseconds: 1000 * time_delta) dataset.each do |row| result.add! row: row.to_h end diff --git a/lib/rights_api/query_parser.rb b/lib/rights_api/query_parser.rb new file mode 100644 index 0000000..8a3cde4 --- /dev/null +++ b/lib/rights_api/query_parser.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require "sequel" + +require_relative "error" + +# Processes the Hash of URL parameters passed to the API into an +# Array of WHERE constraints, as well as LIMIT, and OFFSET values. +# ORDER BY (other than the schema default) will be handled here +# in a future iteration. +module RightsAPI + class QueryParser + DEFAULT_LIMIT = 1000 + DEFAULT_OFFSET = 0 + attr_reader :params, :model, :where, :order, :offset, :limit + + # @param model [Class] Sequel::Model subclass for the table being queried + def initialize(model:) + @model = model + @where = [] + @order = [] + @limit = DEFAULT_LIMIT + @offset = DEFAULT_OFFSET + end + + def parse(params: {}) + @params = params + params.each do |key, values| + key = key.to_sym + case key + when :offset + parse_offset(values: values) + when :limit + parse_limit(values: values) + else + parse_parameter(key: key, values: values) + end + end + @order = [model.default_order] if @order.empty? + self + end + + private + + # Parses a general search parameter and appends the resulting Sequel + # expression to the @where array. + # Currently only handles primary key searches. + # Example: a URL ending with ?id=1 results in: + # parse_parameter(key: :id, values: [1]) + # @param key [Symbol] The search parameter + # @param values [Array] One or more parameter values + def parse_parameter(key:, values:) + values.each do |value| + @where << {model.query_for_field(field: key.to_sym) => value} + end + end + + # Extract a single integer that can be passed to dataset.offset. + def parse_offset(values:) + @offset = parse_int_value(values: values, type: "OFFSET") + end + + # Extract a single integer that can be passed to dataset.limit. + # @param values [Array] One or more limit values + def parse_limit(values:) + @limit = parse_int_value(values: values, type: "LIMIT") + end + + # Extract integer offset= or limit= value from potentially multiple values. + # If multiple values, use the last. + # @param values [Array] One or more offset or limit values + # @param type [String] "OFFSET" or "LIMIT", used only for reporting errors. + # @return [Integer] + def parse_int_value(values:, type:) + value = values.last.to_i + # Make sure the offset can make a round-trip conversion between Int and String + # https://stackoverflow.com/a/1235891 + if value.to_s != values.last + raise QueryParserError, "#{type} expression '#{values.last}' is not an integer (#{value.to_s.class} vs #{values.last.class})" + end + value + end + end +end diff --git a/lib/rights_api/result.rb b/lib/rights_api/result.rb index 560bd6b..e2d1303 100644 --- a/lib/rights_api/result.rb +++ b/lib/rights_api/result.rb @@ -5,13 +5,14 @@ # and then data is populated according to the requested offset and limit. module RightsAPI class Result - attr_reader :offset, :total, :start, :end, :data + attr_reader :offset, :total, :start, :end, :milliseconds, :data # @param offset [Integer] The offset=x URL parameter. # @param total [Integer] The total number of results, regardless of paging. - def initialize(offset: 0, total: 0) + def initialize(offset: 0, total: 0, milliseconds: 0.0) @offset = offset @total = total + @milliseconds = milliseconds @start = 0 @end = 0 @data = [] @@ -37,6 +38,7 @@ def to_h "total" => @total, "start" => @start, "end" => @end, + "milliseconds" => @milliseconds, "data" => @data } finalize h diff --git a/lib/rights_api/result/error_result.rb b/lib/rights_api/result/error_result.rb new file mode 100644 index 0000000..3ffec8a --- /dev/null +++ b/lib/rights_api/result/error_result.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module RightsAPI + class ErrorResult < Result + def initialize(exception:) + super(offset: 0, total: 0) + @exception = exception + end + + private + + def finalize(hash) + # FIXME: should we include the backtrace or just the message? + hash[:error] = @exception.to_s + " #{@exception.backtrace}" + hash + end + end +end diff --git a/spec/integration/support_table_spec.rb b/spec/integration/support_table_spec.rb index d8beffe..f982b92 100644 --- a/spec/integration/support_table_spec.rb +++ b/spec/integration/support_table_spec.rb @@ -33,6 +33,7 @@ RSpec.describe "RightsAPI" do include Rack::Test::Methods + # Full search SUPPORT_TABLES.each do |table| describe "/#{table}" do before(:each) { get(rights_api_endpoint + table) } @@ -40,6 +41,47 @@ end end + # With OFFSET + SUPPORT_TABLES.each do |table| + describe "/#{table}/offset=" do + context "with a valid offset value" do + before(:each) { get(rights_api_endpoint + table + "?offset=1") } + it_behaves_like "nonempty #{table} response" + + it "returns data set starting at index 2" do + response = parse_json(last_response.body) + expect(response[:start]).to eq(2) + end + end + + context "with a bogus offset value" do + before(:each) { get(rights_api_endpoint + table + "?offset=x") } + it_behaves_like "400 response" + end + end + end + + # With LIMIT + SUPPORT_TABLES.each do |table| + describe "/#{table}?limit=" do + context "with a valid limit value" do + before(:each) { get(rights_api_endpoint + table + "?limit=2") } + it_behaves_like "nonempty #{table} response" + + it "returns LIMIT rows" do + response = parse_json(last_response.body) + expect(response[:data].count).to eq(2) + end + end + + context "with a bogus limit value" do + before(:each) { get(rights_api_endpoint + table + "?limit=x") } + it_behaves_like "400 response" + end + end + end + + # By-id search SUPPORT_TABLES.each do |table| describe "/#{table}/:id" do context "with a valid identifier" do diff --git a/spec/shared_examples.rb b/spec/shared_examples.rb index 5d1c7ad..d084be9 100644 --- a/spec/shared_examples.rb +++ b/spec/shared_examples.rb @@ -1,5 +1,11 @@ # frozen_string_literal: true +RSpec.shared_examples "400 response" do + it "returns an HTTP 400 response" do + expect(last_response.status).to eq 400 + end +end + RSpec.shared_examples "404 response" do it "returns an HTTP 404 response" do expect(last_response.status).to eq 404 diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index aa44510..b235fbf 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -2,6 +2,7 @@ require "simplecov" require "simplecov-lcov" +require "logger" SimpleCov.add_filter "spec" @@ -15,6 +16,9 @@ ]) SimpleCov.start +# The web service logging is more verbose than what we really want to deal with here... +ENV["RIGHTS_API_LOGGER_LEVEL"] = Logger::WARN.to_s + require_relative "../lib/rights_api" def app diff --git a/spec/unit/query_parser_spec.rb b/spec/unit/query_parser_spec.rb new file mode 100644 index 0000000..3c03cd2 --- /dev/null +++ b/spec/unit/query_parser_spec.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require "sequel" + +module RightsAPI + RSpec.describe(QueryParser) do + let(:query_parser) { described_class.new(model: Attribute) } + # let(:id_query) { described_class.new(params: "id=some+id", table_name: "rights") } + + describe ".new" do + it "creates a #{described_class}" do + expect(query_parser).to be_a(described_class) + end + + it "has the expected attribute readers" do + %i[model where order offset limit].each do |reader| + expect(query_parser.send(reader)).not_to be_nil + end + end + end + + describe "#parse" do + it "parses empty query into zero WHERE clauses" do + expect(query_parser.parse.where.count).to eq(0) + end + + it "parses id query into one WHERE clause" do + expect(query_parser.parse(params: {id: ["1"]}).where.count).to eq(1) + end + + it "parses OFFSET query" do + expect(query_parser.parse(params: {offset: ["1"]}).offset).to eq(1) + end + + it "parses LIMIT query" do + expect(query_parser.parse(params: {limit: ["5"]}).limit).to eq(5) + end + + it "raises on bogus LIMIT queries" do + expect { query_parser.parse(params: {limit: ["a"]}) }.to raise_error(QueryParserError) + end + + it "raises on bogus OFFSET queries" do + expect { query_parser.parse(params: {offset: ["a"]}) }.to raise_error(QueryParserError) + end + end + end +end diff --git a/spec/unit/query_spec.rb b/spec/unit/query_spec.rb index 06c65ac..b5b2994 100644 --- a/spec/unit/query_spec.rb +++ b/spec/unit/query_spec.rb @@ -4,7 +4,8 @@ module RightsAPI RSpec.describe(Query) do - let(:query) { described_class.new(table_name: "rights") } + let(:query) { described_class.new(model: Attribute) } + let(:query_with_params) { described_class.new(model: Attribute, params: {id: [1]}) } describe ".new" do it "creates a Query" do @@ -15,13 +16,13 @@ module RightsAPI describe "#run" do context "with an id" do it "returns a Result" do - expect(query.run(id: "some id")).to be_a_kind_of(Result) + expect(query_with_params.run).to be_a_kind_of(Result) end end context "without an id" do it "returns a Result" do - expect(query.run(id: nil)).to be_a_kind_of(Result) + expect(query.run).to be_a_kind_of(Result) end end end diff --git a/spec/unit/result/error_result_spec.rb b/spec/unit/result/error_result_spec.rb new file mode 100644 index 0000000..113ba3c --- /dev/null +++ b/spec/unit/result/error_result_spec.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module RightsAPI + RSpec.describe(ErrorResult) do + let(:result) { described_class.new(exception: StandardError.new) } + let(:test_row) { {key1: "value1", key2: "value2"} } + + describe ".new" do + it "creates ErrorResult instance" do + expect(result).to be_a(RightsAPI::ErrorResult) + end + end + + describe "#to_h" do + it "returns a Hash with an error key" do + expect(result.to_h).to be_a(Hash) + expect(result.to_h.key?(:error)).to eq(true) + end + end + end +end diff --git a/spec/unit/result_spec.rb b/spec/unit/result_spec.rb index 35409ce..b24fb18 100644 --- a/spec/unit/result_spec.rb +++ b/spec/unit/result_spec.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module RightsAPI - REQUIRED_HASH_KEYS = %w[total start end data] + REQUIRED_HASH_KEYS = %w[total start end milliseconds data] RSpec.describe(Result) do let(:result) { described_class.new } let(:test_row) { {key1: "value1", key2: "value2"} } @@ -17,14 +17,16 @@ module RightsAPI expect(result.start).to eq(0) expect(result.end).to eq(0) expect(result.data).to eq([]) + expect(result.milliseconds).to eq(0.0) end end context "with parameters" do it "uses provided offset and total" do - res = described_class.new(offset: 10, total: 100) + res = described_class.new(offset: 10, total: 100, milliseconds: 100.0) expect(res.offset).to eq(10) expect(res.total).to eq(100) + expect(res.total).to eq(100.0) end end end