diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a9225fe --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +Dockerfile +dbinit/ diff --git a/.gitignore b/.gitignore index e25dff4..4d3ef7d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,12 +7,24 @@ # Ignore bundler config. /.bundle -/.vagrant +vendor/bundle + +# created by JetBrains' editors/IDEs *.iml +.idea/ -.vagrant_rails_env +*.sublime-* + +# Directories created by vagrant directly + +/.vagrant + +# directories created by vagrant provisioner -.ruby-version +solr-dir/ +tlrn-config/ +.vagrant_rails_env +.db-password # Ignore the default SQLite database. /db/*.sqlite3 @@ -23,31 +35,23 @@ /tmp/* !/log/.keep !/tmp/.keep +tmp/ +passenger.*.log +passenger*.lock +passenger*.pid # Ignore Byebug command history file. .byebug_history -# Local customizations -# +# Files downloaded by Ansible provisioners ansible/roles/* -# might be -trln-config/ -# Where Solr is installed -solr-dir/ -# Where public Solr configuration is downloaded -argon-solr-config - - - +# Other fiels created by/used by app config/local_env.yml -comfig/mappings config/mappings/ .password postgres-setup.sql -*.sublime-* -.idea/ -tmp/ -passenger.*.log -passenger*.lock -passenger*.pid + +# files created/used by containers (docker-compose.yml, init.sh) +.env +solr-docker/config/ diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..d43ee3e --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,153 @@ +# Relaxed.Ruby.Style + +## Version 2.5 + +Style/Alias: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylealias + +Style/AsciiComments: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleasciicomments + +Style/BeginBlock: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylebeginblock + +Style/BlockDelimiters: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleblockdelimiters + +Style/CommentAnnotation: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylecommentannotation + +Style/Documentation: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styledocumentation + +Layout/DotPosition: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#layoutdotposition + +Style/DoubleNegation: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styledoublenegation + +Style/EndBlock: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleendblock + +Style/FormatString: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleformatstring + +Style/IfUnlessModifier: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleifunlessmodifier + +Style/Lambda: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylelambda + +Style/ModuleFunction: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylemodulefunction + +Style/MultilineBlockChain: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylemultilineblockchain + +Style/NegatedIf: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylenegatedif + +Style/NegatedWhile: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylenegatedwhile + +Style/NumericPredicate: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylenumericpredicate + +Style/ParallelAssignment: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleparallelassignment + +Style/PercentLiteralDelimiters: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylepercentliteraldelimiters + +Style/PerlBackrefs: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styleperlbackrefs + +Style/Semicolon: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylesemicolon + +Style/SignalException: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylesignalexception + +Style/SingleLineBlockParams: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylesinglelineblockparams + +Style/SingleLineMethods: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylesinglelinemethods + +Layout/SpaceBeforeBlockBraces: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#layoutspacebeforeblockbraces + +Layout/SpaceInsideParens: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#layoutspaceinsideparens + +Style/SpecialGlobalVars: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylespecialglobalvars + +Style/StringLiterals: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylestringliterals + +Style/TrailingCommaInArguments: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styletrailingcommainarguments + +Style/TrailingCommaInArrayLiteral: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styletrailingcommainarrayliteral + +Style/TrailingCommaInHashLiteral: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#styletrailingcommainhashliteral + +Style/SymbolArray: + Enabled: false + StyleGuide: http://relaxed.ruby.style/#stylesymbolarray + +Style/WhileUntilModifier: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylewhileuntilmodifier + +Style/WordArray: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#stylewordarray + +Lint/AmbiguousRegexpLiteral: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#lintambiguousregexpliteral + +Lint/AssignmentInCondition: + Enabled: false + StyleGuide: https://relaxed.ruby.style/#lintassignmentincondition + +Layout/LineLength: + Enabled: false + +Metrics: + Enabled: false diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d26fc72 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +ARG RUBY_VERSION=2.7-alpine +FROM ruby:${RUBY_VERSION} AS base + +# note here that if you don't use an alpine flavor this +# package update will not work! + +RUN apk update && apk upgrade && apk add --no-cache build-base sqlite-dev libpq-dev libxml2-dev libxslt-dev yajl git nodejs bash sqlite + + +COPY ./ /app/ + +WORKDIR /app + +FROM base AS builder + +RUN bundle config set path /gems && bundle install -j $(nproc) + +FROM base + +WORKDIR /app + +COPY --from=builder /gems /gems + +RUN bundle config set path /gems + +COPY entrypoint /usr/local/bin/entrypoint +ENTRYPOINT ["/usr/local/bin/entrypoint"] +EXPOSE 3000 +CMD ["server"] + diff --git a/Gemfile b/Gemfile index 5bc38cd..b1d1f82 100644 --- a/Gemfile +++ b/Gemfile @@ -91,8 +91,6 @@ gem 'active_record_upsert', platform: :mri gem 'argot', github: 'trln/argot-ruby', tag: 'v1.0.7' -gem 'solrtasks', github: 'trln/solrtasks' - # Use Redis adapter to run Action Cable in production # gem 'redis', '~> 3.0' # Use ActiveModel has_secure_password @@ -131,3 +129,5 @@ end # Windows does not include zoneinfo files, so bundle the tzinfo-data gem # gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] + + gem 'tzinfo-data' diff --git a/Gemfile.lock b/Gemfile.lock index 71280d0..80a1ef9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,14 +11,6 @@ GIT traject (~> 2.0) yajl-ruby (~> 1.2, >= 1.2.1) -GIT - remote: https://github.com/trln/solrtasks.git - revision: c463013d783bad2f9e61a9444914e8587329c9e2 - specs: - solrtasks (0.2.7) - cocaine (~> 0.5.8) - nokogiri (>= 1.10) - GEM remote: https://rubygems.org/ specs: @@ -94,9 +86,6 @@ GEM sassc-rails (>= 2.0.0) builder (3.2.4) byebug (11.1.3) - climate_control (0.2.0) - cocaine (0.5.8) - climate_control (>= 0.0.3, < 1.0) coderay (1.1.3) concurrent-ruby (1.1.9) connection_pool (2.2.5) @@ -270,6 +259,8 @@ GEM turbolinks-source (5.2.0) tzinfo (2.0.4) concurrent-ruby (~> 1.0) + tzinfo-data (1.2022.1) + tzinfo (>= 1.0.0) uglifier (4.2.0) execjs (>= 0.3.0, < 3) unf (0.1.4) @@ -316,12 +307,12 @@ DEPENDENCIES sassc (~> 2.0.0) sidekiq (~> 5.0) simple_token_authentication (~> 1.0) - solrtasks! spring spring-watcher-listen (~> 2.0.0) sqlite3 timecop (~> 0.9.1) turbolinks (~> 5) + tzinfo-data uglifier (>= 1.3.0) web-console yajl-ruby (>= 1.3.1) diff --git a/README.md b/README.md index a752314..41f96ee 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ records for the Triangle Research Libraries Network (TRLN). ![Continuous Integration Status](https://github.com/trln/trln-ingest/workflows/CI/badge.svg) -Setup is currently handled via Vagrant, but here's at least a partial list -of what you'll need installed on the target system, other than Ruby, of course. +Setup is currently handled via Vagrant or Docker Compose, but here's at least a +partial list of what you'll need installed on the target system, other than +Ruby, of course. * Postgres 9.5+ (for JSONB and "upsert" support) -- [Postgres yum repositories](https://yum.postgresql.org/repopackages.php) is a good place @@ -18,9 +19,76 @@ of what you'll need installed on the target system, other than Ruby, of course. ## Getting Started +### Containers and Docker/Podman Compose + +The Vagrant setup should do most of what's needed, but a somewhat +lighter-weight approach is available using `docker compose` (or +`podman-compose`), as defined in the `docker-compose.yml` and various +`Dockerfile`s in the project's directory. + +Before running containers for all the services via compose, you will need +to run the `init.sh` script in the same directory as this file; this will +pull down the solr configuration from the working repository and, if needed, create a docker/podman secret for the PostgreSQL database password. + +Read the comments in `init.sh` for more information, especially if you want +to use a Solr configuration other than from the `main` branch. + +From that point, `docker compose up` will start all the necessary services, +including the primary Rails application (in development mode, so editing files +in the Rails application directories will take effect immediately). + +For more information, look at the `docker-compose.yml`, the `Dockerfile` in +this directory, and the one in in the `solr-docker` subdirectory. A little bit +of `depends_on` and `wait-for` fine-tuning is necessary to ensure that the Solr +plugin files (in `solr-docker/plugins`) and the Solr configuration are +correctly installed before the services services start. When the smoke clears +you should have a complete set of services. + +A few tidbits: note that `docker-compose up` will build any images that it +doesn't already know about, but once an image is found, you'll have to rebuild +it to get any changes you've made (e.g. `docker-compose build app`). If you've +done `docker-compose up` and stop things with `ctrl-c` in the terminal, this +_does not remove any of the images_ so when you run `up` again, you'll get the +same images as were used in your previous run. Frequent `docker-compose down`s +will help eliminate problems resulting from things sticking around too long. + +The default exposed ports are: + +| Port | Service | +|------| --------| +| 3000 | Rails | +| 2181 | Zookeeper | +| 8983 | solr | +| 6379 | Redis | +| 5432 | Postgres | + +These are all available on `localhost`. + +## An Extra Note About Running `trln_argon` (or derivatives thereof) in Another Container + +You might want to do some end-to-end testing of index schema changes for `trln_argon`, and in that case it's nice to point one of those at the Solr instance +exposed by this application. + +Unless you tell it otherwise, `docker-compose` and friends will probably put +all containers it starts into the same network (`bridge` by default -- see https://docs.docker.com/network/ for more details). When running `trln_argon` inside a container, it can't see the services exposed to the _host_ at `localhost` but you can use `host.containers.internal` instead to access the services exposed +by containers, even if they're started from another process. + +In this case, put + +`SOLR_URL: http://host.containers.internal:8983/solr/trlnbib` + +in + +`trln_argon/.internal_test_app/config/local_env.yml` (adjust as necessary for +the particulars of the application you're working with). Note that with +`trln_argon` specifically, you will need to edit this file every time the +application is reinitialized. + ### Vagrant -Use[`vagrant`](https://www.vagrantup.com/) if you just need to get going. +If you don't want to use containers, you can use +[`vagrant`](https://www.vagrantup.com/) if you just need to get going. + Vagrant is a tool for managing virtual machines; it doesn't itself contain any 'virtualization' features, but it knows how to interact with several VM providers (e.g Virtualbox, vmWare Fusion, things like that) and is intended to @@ -237,7 +305,7 @@ stanza: ``` # assuming service name == 'trln-ingest' EnvironmentFile=/etc/default/trln-ingest -PassEnvironment=DB_HOST DB_USER DB_PASSWORD DB_ADAPTER DB_NAME RAILS_ENV TRANSACTION_STORAGE_BASE +PassEnvironment=DB_HOST DB_USER DB_PASSWORD DB_ADAPTER DB_NAME RAILS_ENV TRANSACTION_FILES_BASE_ ``` You may also want to create a `config/database.yml` and `config/solr.yml` for diff --git a/app/services/solr_service.rb b/app/services/solr_service.rb index c511e5b..fc42818 100644 --- a/app/services/solr_service.rb +++ b/app/services/solr_service.rb @@ -4,7 +4,9 @@ class SolrService class << self; attr_accessor :config end - @config = YAML.load_file(Rails.root.join('config', 'solr.yml'))[Rails.env].to_ostruct_deep + COLL_PATH = '/solr/admin/collections' + + @config = Rails.application.config_for(:solr) attr_accessor :url, :collection, :client @@ -12,19 +14,23 @@ class << self; attr_accessor :config end # if passed a block, this object will be the block's parameter. def initialize(collection = 'trlnbib') @collection = collection - @url = URI.join(self.class.config.url.sample, @collection).to_s + @url = URI.join( config.url.sample, @collection).to_s @client = RSolr.connect :url => url yield self if block_given? end + def config + @config ||= Rails.application.config_for(:solr).to_ostruct_deep + end + def count response = @client.select params: { q: '*:*', rows: 0 } end def clusterstatus - @client.get('../admin/collections', params: { action: 'CLUSTERSTATUS' }) + @client.get(COLL_PATH, params: { action: 'clusterstatus' }) rescue StandardError => ex - Rails.logger.error("unable to fetch clusterstatus #{ex.backtrace}") + Rails.logger.error("unable to fetch clusterstatus: #{ex} #{ex.backtrace}") { error: 'sorry' } end @@ -68,4 +74,33 @@ def json_doc_update(files, commit_interval = 0) end @client.commit unless commit_interval == -1 end + + # Creates a collection if it does not + # already exist. + # == Parameters + # collection:: + # A string describing the name of the collection to create. + # defaults to 'trlnbib' + # config_name:: + # A String with the name of the configuration name to use. + # A configuration matching the name must already exist. + # defaults to the same value as `collection` + def create_collection(collection = 'trlnbib', config_name = nil) + return unless Rails.env.development? + + query_resp = @client.get(COLL_PATH, params: { action: 'list' }) + return if query_resp.fetch('collections', []).include?(collection) + + config_name ||= collection + + resp = @client.get(COLL_PATH, params: { + action: 'create', + 'collection.configName' => config_name, + name: collection, + numShards: 1 + }) + return resp + rescue StandardError + warn "#{query_resp.to_json}" + end end diff --git a/app/workers/cancellable_worker.rb b/app/workers/cancellable_worker.rb index 08a9933..ef28bb1 100644 --- a/app/workers/cancellable_worker.rb +++ b/app/workers/cancellable_worker.rb @@ -3,7 +3,7 @@ class CancellableWorker include Sidekiq::Worker def cancelled? - Sidekiq.redis {|r| r.exists("cancelled-#{jid}") }.positive? + Sidekiq.redis { |r| r.exists("cancelled-#{jid}") }.positive? end def cancel!(jid) diff --git a/app/workers/indexing_worker.rb b/app/workers/indexing_worker.rb index 1d9b45e..74b0b8a 100644 --- a/app/workers/indexing_worker.rb +++ b/app/workers/indexing_worker.rb @@ -1,8 +1,10 @@ # Kicks off an indexing process # class IndexingWorker < CancellableWorker + # rubocop:disable Metrics/MethodLength def perform(txn_id, batch_size = 5000) return if cancelled? + begin Transaction.find(txn_id) rescue RecordNotFound @@ -14,4 +16,5 @@ def perform(txn_id, batch_size = 5000) processor.logger = logger processor.run end + # rubocop:enable Metrics/MethodLength end diff --git a/app/workers/transaction_worker.rb b/app/workers/transaction_worker.rb index 1a7d190..c89ea30 100644 --- a/app/workers/transaction_worker.rb +++ b/app/workers/transaction_worker.rb @@ -5,6 +5,7 @@ class TransactionWorker < CancellableWorker def perform(transaction_id) return if cancelled? + begin txn = Transaction.find(transaction_id) rescue ActiveRecord::RecordNotFound @@ -13,8 +14,8 @@ def perform(transaction_id) return end logger.info("Creating a processor for #{transaction_id}") + txn = load_transaction(transaction_id) processor = TransactionProcessor.new(txn) - #processor.logger = logger begin logger.info("Starting ingest for transaction #{transaction_id}") processor.run @@ -22,4 +23,17 @@ def perform(transaction_id) IndexingWorker.perform_async(transaction_id) end end + + private + + def load_transaction(transaction_id) + begin + txn = Transaction.find(transaction_id) + rescue ActiveRecord::RecordNotFound + logger.info("Cancelling #{jid} because no txn matches id #{transaction_id}") + cancel!(jid) + return + end + txn + end end diff --git a/config/boot.rb b/config/boot.rb index 30f5120..2789ad2 100644 --- a/config/boot.rb +++ b/config/boot.rb @@ -1,3 +1,33 @@ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__) require 'bundler/setup' # Set up gems listed in the Gemfile. + +# add some global functions to let configs check environment + +module TRLN + module IngestEnvironment + + def vagrant? + return @vagrant if defined?(@vagrant) + @vagrant = system('grep -q ^vagrant: /etc/passwd') + end + + def container? + return @container if defined?(@container) + @container = ENV['OS_ENV'] == 'container' + end + + module_function :vagrant? + module_function :container? + end +end + + +if TRLN::IngestEnvironment.vagrant? + warn "We are running under vagrant. Subtly mangling configurations" + ENV['VAGRANT'] = 'yes' +end + +if TRLN::IngestEnvironment.container? + warn "We appear to be running inside a container" +end diff --git a/config/database.yml b/config/database.yml index e350be7..121e02d 100644 --- a/config/database.yml +++ b/config/database.yml @@ -12,14 +12,17 @@ default: &default development: &vagrant adapter: postgresql encoding: unicode + # see config/boot.rb + <% unless TRLN::IngestEnvironment.vagrant? %> + # host/passwd not needed when postgres is on localhost and ident auth + # is available, as with vagrant setup + host: <%= ENV.fetch('DB_HOST', 'localhost') %> + password: <%= ENV['DB_PASSWORD'] %> + <% end %> + database: <%= ENV['DB_NAME'] || 'shrindex' %> pool: 5 username: <%= ENV['DB_USER'] || 'set_env_vars' %> - # do not set host when running under vagrant - # host: <% ENV.fetch('DB_HOST', 'localhost') %> - # postgres under vagrant also uses ident when hostnamne - # is not set - #password: <%= ENV['DB_HOST'] %> # Warning: The database defined as "test" will be erased and # re-generated from your development database when you run "rake". diff --git a/config/environments/development.rb b/config/environments/development.rb index a403ff9..3cae38a 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -62,7 +62,7 @@ # routes, locales, etc. This feature depends on the listen gem. config.file_watcher = ActiveSupport::EventedFileUpdateChecker - config.stash_directory = ENV['APP_STASH_DIRECTORY'] || "#{ENV['HOME']}/spofford-data" + config.stash_directory = ENV['TRANSACTION_FILES_BASE'] || "#{ENV['HOME']}/spofford-data" unless File.directory?(config.stash_directory) $stderr.write("Transaction storage directory #{config.stash_directory} does not exist!\n") diff --git a/config/initializers/spofford.rb b/config/initializers/spofford.rb index 7751612..1a51e36 100644 --- a/config/initializers/spofford.rb +++ b/config/initializers/spofford.rb @@ -15,4 +15,8 @@ FileUtils.mkdir(config.stash_directory) end + if Rails.env.development? + SolrService.new.create_collection + end + end diff --git a/config/routes.rb b/config/routes.rb index 6e21b8c..e31bb7a 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -26,6 +26,11 @@ post '/ingest/:owner', to: 'transactions#ingest_json', constraints: { content_type: 'application/json' } post '/ingest/:owner', to: 'transactions#ingest_zip', constraints: { content_type: 'application/zip' } + put '/ingest/:owner', to: 'transactions#ingest_json', contstraints: {content_type: 'application/json' } + + put '/ingest/:owner', to: 'transactions#ingest_zip', contstraints: {content_type: 'application/zip' } + + get '/ingest/:owner', to: 'transactions#ingest_form', as: 'ingest_form' post '/ingest/:owner', contraints: { content_type: :multipart_form }, to: 'transactions#upload' diff --git a/config/solr.yml b/config/solr.yml index ed72b70..d99b38f 100644 --- a/config/solr.yml +++ b/config/solr.yml @@ -3,7 +3,7 @@ # see app/services/solr_service.rb common: &common url: - - http://localhost:8983/solr/ + - <%= ENV.fetch('SOLR_URL', 'http://localhost:8983/solr/') %> collections: - trlnbib - icetocs diff --git a/db/schema.rb b/db/schema.rb index 79d7f50..473eb5c 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2,20 +2,20 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20180904184901) do +ActiveRecord::Schema.define(version: 2018_09_04_184901) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" - create_table "documents", id: :string, limit: 32, force: :cascade do |t| + create_table "documents", id: { type: :string, limit: 32 }, force: :cascade do |t| t.string "local_id", limit: 32, null: false t.string "owner", limit: 32, null: false t.jsonb "content" diff --git a/dbinit/0-users.sql b/dbinit/0-users.sql new file mode 100755 index 0000000..318a2cc --- /dev/null +++ b/dbinit/0-users.sql @@ -0,0 +1,3 @@ +CREATE user ainsworth WITH PASSWORD 'a1nsw0rth'; +CREATE database shrindex OWNER ainsworth; + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..95b70f9 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,136 @@ +--- +services: + database: + image: postgres:14-alpine + container_name: database + ports: + - 5432:5432 + environment: + POSTGRES_PASSWORD: "p0tat0 precious polym3r chimes" + PODMAN_USERNS: keep-id + volumes: + # normally this would be :Z on selinux systems however + # this appears to interfere with the right selinux context + # being set on the container. :Z means the container should get + # its own context and be the only one that can access the mount + # :z means other containers in this compose file can access + # the mount + - ./dbinit:/docker-entrypoint-initdb.d/:z + + healthcheck: + test: pg_isready -U postgres -h 127.0.0.1 + interval: 5s + + redis: + image: redis:5-bullseye + container_name: redis + ports: + - 6379:6379 + healthcheck: + test: redis-cli ping + interval: 1s + timeout: 3s + retries: 10 + + # this is designed merely to be 'inherited' from so we arent' + # repeating ourselves with the rails and sidekiq containers + base-app: &base-app + image: rwgrim/docker-noop + volumes: + # see above; any :Z seems to break the postgres container + - ./:/app/:z + - ingest-transactions:/transactions:z + depends_on: + database: + condition: service_healthy + redis: + condition: service_healthy + solr1: + condition: service_healthy + + secrets: + - trln-ingest-db-pw + environment: + OS_ENV: container + DB_HOST: database + DB_USER: ainsworth + REDIS_URL: redis://redis:6379 + SOLR_URL: http://solr1:8983/solr/ + TRANSACTION_FILES_BASE: /transactions + + app: &app + <<: *base-app + container_name: spofford + image: ingestapp + build: . + command: server + ports: + - 3000:3000 + + depends_on: + - solr1 + - redis + - database + + + sidekiq: + <<: *base-app + image: ingestapp + command: sidekiq + + solr1: + image: trln-ingest-solr:latest + container_name: solr1 + build: + context: solr-docker + ports: + - 8983:8983 + environment: + - ZK_HOST=zoo1:2181 + - SOLR_MODULES=analysis-extras,trln + depends_on: + - zoo1 + healthcheck: + test: ['CMD-SHELL', 'curl -sf http://localhost:8983/solr/admin/collections?action=list' ] + volumes: + - ./solr-docker/plugins:/trln-modules + - ./solr-docker/config:/trlnbib-config:z + - ./solr-docker/initscripts:/docker-entrypoint-initdb.d:Z + + zoo1: + image: zookeeper:3.6.2 + container_name: zoo1 + restart: always + #hostname: zoo1 + ports: + - 2181:2181 + - 7001:7000 + environment: + ZOO_MY_ID: 1 + ZOO_SERVERS: server.1=zoo1:2888:388;2181 + ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok + + solr_config_loader: + image: solr:9 + container_name: config_loader + depends_on: + - solr1 + environment: + - SOLR_HOST=solr1 + command: + - bash + - "-e" + - "-x" + - "-c" + - "wait-for-solr.sh --max-attempts 10 --wait-seconds 5 --solr-url http://$$SOLR_HOST:8983/; solr create_collection -c trlnbib -d /trlnbib-config -p 8983" + + volumes: + - ./solr-docker/config:/trlnbib-config + + +secrets: + trln-ingest-db-pw: + file: .db-password + +volumes: + ingest-transactions: diff --git a/entrypoint b/entrypoint new file mode 100755 index 0000000..b73d255 --- /dev/null +++ b/entrypoint @@ -0,0 +1,47 @@ +#!/bin/sh + +# Custom entrypoint to enable local development + +set -e + +rm -f /app/tmp/pids/server.pid + +if [ -e /run/secrets/trln-ingest-db-pw ]; then + export DB_PASSWORD=$(cat /run/secrets/trln-ingest-db-pw) +else + echo "Secret not available" + exit 1 +fi + +export OS_ENV=container + +echo "TRANSACTION FILES BASE: '${TRANSACTION_FILES_BASE}'" +export TRANSACTION_FILES_BASE=${TRANSACTION_FILES_BASE:-/transactions} + +cd /app + +CMD=${1:-server} + +case $1 in + server) + bundle exec rails db:migrate + bundle exec rails user:admin + bundle exec rails server -b 0.0.0.0 + ;; + sidekiq) + exec bundle exec sidekiq + ;; + migrate) + exec bundle exec rails db:migrate + ;; + test) + export RAILS_ENV=test + exec bundle exec rails test + ;; + shell) + exec /bin/bash + ;; + *) + exec bundle exec $@ + ;; +esac diff --git a/init.sh b/init.sh new file mode 100755 index 0000000..c3bcc88 --- /dev/null +++ b/init.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eu + +# Ensures that a suitable TRLN Discovery Solr configuration +# is downloaded and available for use with Docker/Podman Compose. +# Additionally creates a podman/docker secret for the PostgreSQL database +# password + +# By default pulls the `master` branch, but this can be changed +# by passing in the name of another branch when running this script. + +# After a copy has been checked out, a different branch can be used +# via `cd solr-docker/config && git pull origin [newbranch] && git checkout [newbranch]` and re-starting all the containers. + +CONFIG_BRANCH=${1:-main} + +echoerr() { + echo "$@" 1>&2; +} + +wd=$(pwd) + +# If solr setup fails due to missing configuration, then +# rm -rf solr-docker/config and re-run this script +if [ ! -d solr-docker/config/ ]; then + cd solr-docker + echo "checking out solr configuration" + git clone https://github.com/trln/trlnbib-solr-config config/ + if [ "main" != "${CONFIG_BRANCH}" ]; then + cd config && git checkout ${CONFIG_BRANCH} + fi + cd $wd +else + echo "solr configuration is already available" +fi + +# figure out whether docker or podman is available on the command line; +# prefer podman to docker + +container_runner='podman' +if [ -z "$(type -P "${container_runner}")" ]; then + container_runner='docker' +fi + +if [ -z "$(type -P "${container_runner}")" ]; then + echo "Neither of podman/docker found. Exiting." + exit 1 +fi + +# grep will have a non-zero exit if the value isn't found + +if [ ! -s .db-password ]; then + echo "database password file not found, creating" + echo 'a1nsw0rth' > .db-password +else + echo "database passwod is already set up" +fi diff --git a/lib/spofford/version.rb b/lib/spofford/version.rb index f5b5fb4..d28e22e 100644 --- a/lib/spofford/version.rb +++ b/lib/spofford/version.rb @@ -1,3 +1,3 @@ module Spofford - VERSION = '1.0.0'.freeze + VERSION = '1.0.1'.freeze end diff --git a/lib/tasks/user.rake b/lib/tasks/user.rake index 5a25b46..2571d44 100644 --- a/lib/tasks/user.rake +++ b/lib/tasks/user.rake @@ -1,6 +1,9 @@ namespace :user do def vagrant? - @vagrant ||= system("grep '^vagrant:' /etc/passwd") + return @vagrant if defined?(@vagrant) + + # see config/boot.rb + @vagrant = TRLN::IngestEnvironment.vagrant? || TRLN::IngestEnvironment.container? end desc 'List users' task list: :environment do diff --git a/solr-docker/.gitignore b/solr-docker/.gitignore new file mode 100644 index 0000000..04204c7 --- /dev/null +++ b/solr-docker/.gitignore @@ -0,0 +1 @@ +config diff --git a/solr-docker/Dockerfile b/solr-docker/Dockerfile new file mode 100644 index 0000000..53bf98b --- /dev/null +++ b/solr-docker/Dockerfile @@ -0,0 +1,4 @@ +FROM solr:9.0.0 + +# Add external jars +COPY plugins/*.jar /opt/solr/modules/trln/lib/ diff --git a/solr-docker/README.md b/solr-docker/README.md new file mode 100644 index 0000000..4cb7f99 --- /dev/null +++ b/solr-docker/README.md @@ -0,0 +1,22 @@ +# Docker/Podman setup for Solr + +This directory contains customizations to run Solr in a container via `docker +compose` or `podman-compose` for purposes of local development. + +It assumes that the TRLN Discovery Solr configset has already been installed into the `config` subdirectory. See the `init.sh` file in the parent directory for +more information. + +## Rationale + +The official Solr containers from the Apache Solr project provide a number of +features to get up and running quickly, but they're mostly geared around +creating _cores_ rather than _collections_; the Rails application assumes it's +working in a Solr Cloud deployment, because that's what's available in +production. + +Using collections rather than cores means we have to do quite a bit more setup, +over and above ensuring that the configset is available, and most of what +happens in the Dockerfile is oriented around that setup. + +Additionaly, the Rails application will create the `trlnbib` collection when +running in `development` mode if it does not already exist. diff --git a/solr-docker/initscripts/setup.sh b/solr-docker/initscripts/setup.sh new file mode 100755 index 0000000..236d782 --- /dev/null +++ b/solr-docker/initscripts/setup.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# ensures configuration is uploaded; assumes configuration is mounted +# at /trlnbib-config +/opt/solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST:-zootopia:2181} -cmd upconfig -confname trlnbib -confdir /trlnbib-config + diff --git a/solr-docker/plugins/CJKFilterUtils-v3.0-SNAPSHOT.jar b/solr-docker/plugins/CJKFilterUtils-v3.0-SNAPSHOT.jar new file mode 100644 index 0000000..3fcae53 Binary files /dev/null and b/solr-docker/plugins/CJKFilterUtils-v3.0-SNAPSHOT.jar differ diff --git a/solr-docker/plugins/lucene-umich-solr-filters-2.0-solr-9.0.0.jar b/solr-docker/plugins/lucene-umich-solr-filters-2.0-solr-9.0.0.jar new file mode 100644 index 0000000..ae38357 Binary files /dev/null and b/solr-docker/plugins/lucene-umich-solr-filters-2.0-solr-9.0.0.jar differ diff --git a/wait-for b/wait-for new file mode 100755 index 0000000..1a66bcf --- /dev/null +++ b/wait-for @@ -0,0 +1,192 @@ +#!/bin/sh + +# The MIT License (MIT) +# +# Copyright (c) 2017 Eficode Oy +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +VERSION="2.2.2" + +set -- "$@" -- "$TIMEOUT" "$QUIET" "$PROTOCOL" "$HOST" "$PORT" "$result" +TIMEOUT=15 +QUIET=0 +# The protocol to make the request with, either "tcp" or "http" +PROTOCOL="tcp" + +echoerr() { + if [ "$QUIET" -ne 1 ]; then printf "%s\n" "$*" 1>&2; fi +} + +usage() { + exitcode="$1" + cat << USAGE >&2 +Usage: + $0 host:port|url [-t timeout] [-- command args] + -q | --quiet Do not output any status messages + -t TIMEOUT | --timeout=timeout Timeout in seconds, zero for no timeout + -v | --version Show the version of this tool + -- COMMAND ARGS Execute command with args after the test finishes +USAGE + exit "$exitcode" +} + +wait_for() { + case "$PROTOCOL" in + tcp) + if ! command -v nc >/dev/null; then + echoerr 'nc command is missing!' + exit 1 + fi + ;; + wget) + if ! command -v wget >/dev/null; then + echoerr 'wget command is missing!' + exit 1 + fi + ;; + esac + + TIMEOUT_END=$(($(date +%s) + TIMEOUT)) + + while :; do + case "$PROTOCOL" in + tcp) + nc -w 1 -z "$HOST" "$PORT" > /dev/null 2>&1 + ;; + http) + wget --timeout=1 -q "$HOST" -O /dev/null > /dev/null 2>&1 + ;; + *) + echoerr "Unknown protocol '$PROTOCOL'" + exit 1 + ;; + esac + + result=$? + + if [ $result -eq 0 ] ; then + if [ $# -gt 7 ] ; then + for result in $(seq $(($# - 7))); do + result=$1 + shift + set -- "$@" "$result" + done + + TIMEOUT=$2 QUIET=$3 PROTOCOL=$4 HOST=$5 PORT=$6 result=$7 + shift 7 + exec "$@" + fi + exit 0 + fi + + if [ $TIMEOUT -ne 0 -a $(date +%s) -ge $TIMEOUT_END ]; then + echo "Operation timed out" >&2 + exit 1 + fi + + sleep 1 + done +} + +while :; do + case "$1" in + http://*|https://*) + HOST="$1" + PROTOCOL="http" + shift 1 + ;; + *:* ) + HOST=$(printf "%s\n" "$1"| cut -d : -f 1) + PORT=$(printf "%s\n" "$1"| cut -d : -f 2) + shift 1 + ;; + -v | --version) + echo $VERSION + exit + ;; + -q | --quiet) + QUIET=1 + shift 1 + ;; + -q-*) + QUIET=0 + echoerr "Unknown option: $1" + usage 1 + ;; + -q*) + QUIET=1 + result=$1 + shift 1 + set -- -"${result#-q}" "$@" + ;; + -t | --timeout) + TIMEOUT="$2" + shift 2 + ;; + -t*) + TIMEOUT="${1#-t}" + shift 1 + ;; + --timeout=*) + TIMEOUT="${1#*=}" + shift 1 + ;; + --) + shift + break + ;; + --help) + usage 0 + ;; + -*) + QUIET=0 + echoerr "Unknown option: $1" + usage 1 + ;; + *) + QUIET=0 + echoerr "Unknown argument: $1" + usage 1 + ;; + esac +done + +if ! [ "$TIMEOUT" -ge 0 ] 2>/dev/null; then + echoerr "Error: invalid timeout '$TIMEOUT'" + usage 3 +fi + +case "$PROTOCOL" in + tcp) + if [ "$HOST" = "" ] || [ "$PORT" = "" ]; then + echoerr "Error: you need to provide a host and port to test." + usage 2 + fi + ;; + http) + if [ "$HOST" = "" ]; then + echoerr "Error: you need to provide a host to test." + usage 2 + fi + ;; +esac + +wait_for "$@" +