diff --git a/.trivyignore b/.trivyignore index afd9319e23..4dea0cceee 100644 --- a/.trivyignore +++ b/.trivyignore @@ -91,3 +91,7 @@ CVE-2021-3711 # is only available in premium support, trivy thinks we should use something in the 1.1.1 # line. We can't, due to FIPS compliance, so need to continue to ignore this issue. CVE-2023-0286 + +# Scanners pick up this vulnerability in OpenSSL::ASN1 module in Ruby before 2.2.8, 2.3.x before 2.3.5, and 2.4.x through 2.4.1 +# however we use ruby 3+ in production so we can safely ignore it. +CVE-2017-14033 diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e04970c10..0577f95f57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -136,13 +136,29 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Remove auto-release options to allow for a pseudo-fork development on a branch -## [1.19.6] - 2023-07-05 +## [1.20.0] - 2023-07-11 + +### Added +- Telemetry support + [cyberark/conjur#2854](https://github.com/cyberark/conjur/pull/2854) +- New flag to `conjurctl server` command called `--no-migrate` which allows for skipping + the database migration step when starting the server. + [cyberark/conjur#2895](https://github.com/cyberark/conjur/pull/2895) + +### Changed +- The database thread pool max connection size is now based on the number of + web worker threads per process, rather than an arbitrary fixed number. This + mitigates the possibility of a web worker becoming starved while waiting for + a connection to become available. + [cyberark/conjur#2875](https://github.com/cyberark/conjur/pull/2875) ### Fixed - Support Authn-IAM regional requests when host value is missing from signed headers. [cyberark/conjur#2827](https://github.com/cyberark/conjur/pull/2827) +- Support plural syntax for revoke and deny + [CONJSE-1783](https://ca-il-jira.il.cyber-ark.com:8443/browse/CONJSE-1783) -## [1.19.5] - 2023-05-29 +## [1.19.5] - 2023-06-29 ### Security - Update bundler to 2.2.33 to remove CVE-2021-43809 @@ -195,7 +211,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. [cyberark/conjur#2739](https://github.com/cyberark/conjur/pull/2739) - Upgraded rack to v2.2.6.4 to resolve CVE-2023-27539 [cyberark/conjur#2750](https://github.com/cyberark/conjur/pull/2750) -- Updated nokogiri to 1.14.3 for CVE-2023-29469 and CVE-2023-28484 and rails to +- Updated nokogiri to 1.14.3 for CVE-2023-29469 and CVE-2023-28484 and rails to 6.1.7.3 for CVE-2023-28120 in Gemfile.lock, nokogiri to 1.1.4.3 for CVE-2023-29469 and commonmarker to 0.23.9 for CVE-2023-24824 and CVE-2023-26485 in docs/Gemfile.lock (all Medium severity issues flagged by Dependabot) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 3c0e9614ba..53a90115af 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -73,13 +73,13 @@ RUN INSTALL_PKGS="gcc \ bundle --without test development && \ # Remove the build packages yum remove -y $INSTALL_PKGS && \ - yum -y clean all --enablerepo='*' + yum -y clean all --enablerepo='*' && \ + # removing CA bundle of httpclient gem + find / -name 'httpclient-*' -type d -exec find {} -name '*.pem' -type f -delete \; && \ + find / -name 'httpclient-*' -type d -exec find {} -name '*.key' -type f -delete \; COPY . . -# removing CA bundle of httpclient gem -RUN find / -name httpclient -type d -exec find {} -name *.pem -type f -delete \; - RUN ln -sf /opt/conjur-server/bin/conjurctl /usr/local/bin/ COPY LICENSE.md /licenses/ diff --git a/Gemfile b/Gemfile index 0301f54f33..e3a33d72c4 100644 --- a/Gemfile +++ b/Gemfile @@ -79,6 +79,8 @@ gem 'openid_connect' gem "anyway_config" gem 'i18n', '~> 1.8.11' +gem 'prometheus-client' + group :development, :test do gem 'aruba' gem 'ci_reporter_rspec' diff --git a/Gemfile.lock b/Gemfile.lock index 29b04ee9dd..08eed7f34e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -21,60 +21,60 @@ PATH GEM remote: https://rubygems.org/ specs: - actioncable (6.1.7.3) - actionpack (= 6.1.7.3) - activesupport (= 6.1.7.3) + actioncable (6.1.7.4) + actionpack (= 6.1.7.4) + activesupport (= 6.1.7.4) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailbox (6.1.7.3) - actionpack (= 6.1.7.3) - activejob (= 6.1.7.3) - activerecord (= 6.1.7.3) - activestorage (= 6.1.7.3) - activesupport (= 6.1.7.3) + actionmailbox (6.1.7.4) + actionpack (= 6.1.7.4) + activejob (= 6.1.7.4) + activerecord (= 6.1.7.4) + activestorage (= 6.1.7.4) + activesupport (= 6.1.7.4) mail (>= 2.7.1) - actionmailer (6.1.7.3) - actionpack (= 6.1.7.3) - actionview (= 6.1.7.3) - activejob (= 6.1.7.3) - activesupport (= 6.1.7.3) + actionmailer (6.1.7.4) + actionpack (= 6.1.7.4) + actionview (= 6.1.7.4) + activejob (= 6.1.7.4) + activesupport (= 6.1.7.4) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (6.1.7.3) - actionview (= 6.1.7.3) - activesupport (= 6.1.7.3) + actionpack (6.1.7.4) + actionview (= 6.1.7.4) + activesupport (= 6.1.7.4) rack (~> 2.0, >= 2.0.9) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.2.0) - actiontext (6.1.7.3) - actionpack (= 6.1.7.3) - activerecord (= 6.1.7.3) - activestorage (= 6.1.7.3) - activesupport (= 6.1.7.3) + actiontext (6.1.7.4) + actionpack (= 6.1.7.4) + activerecord (= 6.1.7.4) + activestorage (= 6.1.7.4) + activesupport (= 6.1.7.4) nokogiri (>= 1.8.5) - actionview (6.1.7.3) - activesupport (= 6.1.7.3) + actionview (6.1.7.4) + activesupport (= 6.1.7.4) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.1, >= 1.2.0) - activejob (6.1.7.3) - activesupport (= 6.1.7.3) + activejob (6.1.7.4) + activesupport (= 6.1.7.4) globalid (>= 0.3.6) - activemodel (6.1.7.3) - activesupport (= 6.1.7.3) - activerecord (6.1.7.3) - activemodel (= 6.1.7.3) - activesupport (= 6.1.7.3) - activestorage (6.1.7.3) - actionpack (= 6.1.7.3) - activejob (= 6.1.7.3) - activerecord (= 6.1.7.3) - activesupport (= 6.1.7.3) + activemodel (6.1.7.4) + activesupport (= 6.1.7.4) + activerecord (6.1.7.4) + activemodel (= 6.1.7.4) + activesupport (= 6.1.7.4) + activestorage (6.1.7.4) + actionpack (= 6.1.7.4) + activejob (= 6.1.7.4) + activerecord (= 6.1.7.4) + activesupport (= 6.1.7.4) marcel (~> 1.0) mini_mime (>= 1.1.0) - activesupport (6.1.7.3) + activesupport (6.1.7.4) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) @@ -277,9 +277,9 @@ GEM listen (3.7.0) rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) - loofah (2.20.0) + loofah (2.21.3) crass (~> 1.0.2) - nokogiri (>= 1.5.9) + nokogiri (>= 1.12.0) mail (2.8.1) mini_mime (>= 0.1.1) net-imap @@ -291,10 +291,10 @@ GEM mime-types-data (~> 3.2015) mime-types-data (3.2022.0105) mini_mime (1.1.2) - minitest (5.18.0) + minitest (5.19.0) multi_json (1.15.0) multi_test (0.1.2) - net-imap (0.3.4) + net-imap (0.3.7) date net-protocol net-ldap (0.17.0) @@ -307,9 +307,9 @@ GEM net-ssh (6.1.0) netrc (0.11.0) nio4r (2.5.9) - nokogiri (1.14.3-x86_64-darwin) + nokogiri (1.15.3-x86_64-darwin) racc (~> 1.4) - nokogiri (1.14.3-x86_64-linux) + nokogiri (1.15.3-x86_64-linux) racc (~> 1.4) openid_connect (1.3.0) activemodel @@ -328,6 +328,7 @@ GEM ast (~> 2.4.1) pg (1.2.3) powerpack (0.1.3) + prometheus-client (3.0.0) pry (0.13.1) coderay (~> 1.1) method_source (~> 1.0) @@ -340,8 +341,8 @@ GEM puma (5.6.4) nio4r (~> 2.0) raabro (1.4.0) - racc (1.6.2) - rack (2.2.6.4) + racc (1.7.1) + rack (2.2.7) rack-oauth2 (1.19.0) activesupport attr_required @@ -351,39 +352,41 @@ GEM rack-rewrite (1.5.1) rack-test (2.1.0) rack (>= 1.3) - rails (6.1.7.3) - actioncable (= 6.1.7.3) - actionmailbox (= 6.1.7.3) - actionmailer (= 6.1.7.3) - actionpack (= 6.1.7.3) - actiontext (= 6.1.7.3) - actionview (= 6.1.7.3) - activejob (= 6.1.7.3) - activemodel (= 6.1.7.3) - activerecord (= 6.1.7.3) - activestorage (= 6.1.7.3) - activesupport (= 6.1.7.3) + rails (6.1.7.4) + actioncable (= 6.1.7.4) + actionmailbox (= 6.1.7.4) + actionmailer (= 6.1.7.4) + actionpack (= 6.1.7.4) + actiontext (= 6.1.7.4) + actionview (= 6.1.7.4) + activejob (= 6.1.7.4) + activemodel (= 6.1.7.4) + activerecord (= 6.1.7.4) + activestorage (= 6.1.7.4) + activesupport (= 6.1.7.4) bundler (>= 1.15.0) - railties (= 6.1.7.3) + railties (= 6.1.7.4) sprockets-rails (>= 2.0.0) rails-controller-testing (1.0.5) actionpack (>= 5.0.1.rc1) actionview (>= 5.0.1.rc1) activesupport (>= 5.0.1.rc1) - rails-dom-testing (2.0.3) - activesupport (>= 4.2.0) + rails-dom-testing (2.1.1) + activesupport (>= 5.0.0) + minitest nokogiri (>= 1.6) - rails-html-sanitizer (1.5.0) - loofah (~> 2.19, >= 2.19.1) + rails-html-sanitizer (1.6.0) + loofah (~> 2.21) + nokogiri (~> 1.14) rails_12factor (0.0.3) rails_serve_static_assets rails_stdout_logging rails_layout (1.0.42) rails_serve_static_assets (0.0.5) rails_stdout_logging (0.0.5) - railties (6.1.7.3) - actionpack (= 6.1.7.3) - activesupport (= 6.1.7.3) + railties (6.1.7.4) + actionpack (= 6.1.7.4) + activesupport (= 6.1.7.4) method_source rake (>= 12.2) thor (~> 1.0) @@ -477,8 +480,8 @@ GEM sys-uname (1.2.2) ffi (~> 1.1) table_print (1.5.7) - thor (1.2.1) - timeout (0.3.2) + thor (1.2.2) + timeout (0.4.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unf (0.1.4) @@ -499,13 +502,13 @@ GEM addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) - webrick (1.7.0) + webrick (1.8.1) websocket (1.2.9) - websocket-driver (0.7.5) + websocket-driver (0.7.6) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) xdg (2.2.3) - zeitwerk (2.6.7) + zeitwerk (2.6.9) PLATFORMS x86_64-darwin-20 @@ -556,6 +559,7 @@ DEPENDENCIES parallel parallel_tests pg + prometheus-client pry-byebug pry-rails puma (~> 5.6) diff --git a/TELEMETRY.md b/TELEMETRY.md new file mode 100644 index 0000000000..08dbbf9366 --- /dev/null +++ b/TELEMETRY.md @@ -0,0 +1,172 @@ +# Conjur Telemetry + +Conjur provides a configurable telemetry feature built on +[Prometheus](https://prometheus.io/), which is the preferred open source +monitoring tool for Cloud Native applications. When enabled, it will capture +performance and usage metrics of the running Conjur instance. These metrics are +exposed via a REST endpoint (/metrics) where Prometheus can scrape the data and +archive it as a queryable time series. This increases the observability of a +running Conjur instance and allows for easy integration with popular +visualization and monitoring tools. + +## Metrics + +This implementation leverages the following supported metric types via the +[Prometheus Ruby client library](https://github.com/prometheus/client_ruby): +| Type | Description +| --- | ----------- | +| counter | A cumulative metric that represents a single monotonically increasing counter whose value can only increase or be reset to zero on restart. | +| gauge | A metric that represents a single numerical value that can arbitrarily go up and down. | +| histogram | A metric which samples observations (usually things like request durations or response sizes) and counts them in configurable buckets. | + +See the [Prometheus docs](https://prometheus.io/docs/concepts/metric_types/) for +more on supported metric types. + +### Defined Metrics + +The following metrics are provided with this implementation and will be captured +by default when telemetry is enabled: +| Metric | Type | Description | Labels\* | +| - | - | - | -| +| conjur_http_server_request_exceptions_total| counter | Total number of exceptions that have occured in Conjur during API requests. | operation, exception, message | +| conjur_http_server_requests_total | counter | Total number of API requests handled Conjur and resulting response codes. | operation, code | +| conjur_http_server_request_duration_seconds | histogram | Time series data of API request durations. | operation | +| conjur_server_authenticator | gauge | Number of authenticators installed, configured, and enabled. | type, status | +| conjur_resource_count | counter | Number of resources in the Conjur database. | kind | +| conjur_role_count | counter | Number of roles in the Conjur database. | kind | + +\*Labels are the identifiers by which metrics are logically grouped. For example +`conjur_http_server_requests_total` with the labels `operation` and `code` may +appear like so in the metrics registry: + +```txt +conjur_http_server_requests_total{code="200",operation="getAccessToken"} 1.0 +conjur_http_server_requests_total{code="201",operation="loadPolicy"} 1502.0 +conjur_http_server_requests_total{code="409",operation="loadPolicy"} 1498.0 +conjur_http_server_requests_total{code="401",operation="loadPolicy"} 327.0 +conjur_http_server_requests_total{code="200",operation="getMetrics"} 60.0 +conjur_http_server_requests_total{code="401",operation="unknown"} 62.0 +``` + +This registry format is consistent with the [data model for Prometheus +metrics](https://prometheus.io/docs/concepts/data_model/). + +## Configuration + +### Enabling Metrics Collection + +Metrics telemetry is off by default. It can be enabled in the following ways, +consistent with Conjur's usage of [Anyway Config](https://github.com/palkan/anyway_config): + +| **Name** | **Type** | **Default** | **Required?** | +|----------|----------|-------------|---------------| +| CONJUR_TELEMETRY_ENABLED | Env variable | None | No | +| telemetry_enabled | Key in Config file | None | No | + +Starting Conjur with either of the above configurations set to `true` will result +in initialization of the telemetry feature. + +### Metrics Storage + +Metrics are stored in the Prometheus client store, which is to say they are +stored on the volume of the container running Conjur. The default path for this +is `/tmp/prometheus` but a custom path can also be read in from the environment +variable `CONJUR_METRICS_DIR` on initialization. + +When Prometheus is running alongside Conjur, it can be configured to +periodically scrape metric values via the `/metrics` endpoint. It will keep a +time series of the configured metrics and store this data in a queryable +[on-disk database](https://prometheus.io/docs/prometheus/latest/storage/). See +[prometheus.yml](https://github.com/cyberark/conjur/dev/files/prometheus/prometheus.yml) +for a sample Prometheus config with Conjur as a scrape target. + +## Instrumenting New Metrics + +The following represents a high-level pattern which can be replicated to +instrument new Conjur metrics. Since the actual implementation will vary based +on the type of metric, how the pub/sub event should be instrumented, etc. it is +best to review the existing examples and determine the best approach on a +case-by-case basis. + +1. Create a metric class under the Monitoring::Metrics module (see + `/lib/monitoring/metrics` for examples) +1. Implement `setup(registry, pubsub)` method + 1. Initialize the metric by setting instance variables defining the metric + name, description, labels, etc. + 1. Expose the above instance variables via an attribute reader + 1. Register the metric by calling `Metrics.create_metric(self, :type)` where + type can be `counter`, `gauge`, or `histogram` +1. Implement `update` method to define update behavior + 1. Get the metric from the registry + 1. Determine the label values + 1. Determine and set the metric values +1. Implement a publishing event* + 1. Determine where in the code an event should be triggered which updates + the metric + 1. Use the PubSub singleton class to instrument the correct event i.e. + `Monitoring::PubSub.instance.publish('conjur.policy_loaded')` +1. Add the newly-defined metric to Prometheus initializer + (`/config/initializers/prometheus.rb`) + +*Since instrumenting Pub/Sub events may involve modifying existing code, it +should be as unintrusive as possible. For example, the existing metrics use the +following two methods to avoid modifying any Conjur behavior or impacting +performance: + +* For HTTP requests - instrument the `conjur.request` from the middleware layer +so it does not require changes to Conjur code +* For Policy loading - instrument the `conjur.policy_loaded` event using an +`after_action` hook, which avoids modifying any controller methods + +## Security + +Prometheus supports either an unprotected `/metrics` endpoint, or [basic auth +via the scrape +config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). +For the sake of reducing the burden on developers, it was elected to leave this +endpoint open by handling it in middleware, bypassing authentication +requirements. This was a conscious decision since Conjur already contains other +unprotected endpoints for debugging/status info. None of the metrics data +captured will contain sensitive values or data. + +It was also taken into account that production deployments of Conjur are less +likely to leverage this feature, but if they do, there will almost certainly be +a load balancer which can easily be configured to require basic auth on the +`/metrics` endpoint if required. + +## Integrations + +As mentioned, Prometheus allows for a variety of integrations for monitoring +captured metrics. [Grafana](https://prometheus.io/docs/visualization/grafana/) +provides a popular lightweight option for creating custom dashboards and +visualizing your data based on queries against Prometheus' data store. + +[AWS +Cloudwatch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus.html) +also offers a powerful option for aggregating metrics stored in Prometheus and +integrating them into its Container Insights platform in AWS +[ECS](https://aws-otel.github.io/docs/getting-started/container-insights/ecs-prometheus) +or +[EKS](https://aws-otel.github.io/docs/getting-started/container-insights/eks-prometheus) +environments. + +Similar options exist for other popular Kubernetes and cloud-monitoring +platforms, such as [Microsoft's Azure +Monitor](https://learn.microsoft.com/en-us/azure/azure-monitor/containers/container-insights-prometheus-integration) +and [Google's Cloud +Monitoring](https://cloud.google.com/stackdriver/docs/managed-prometheus). + +## Performance + +Benchmarks were taken with and without the Conjur telemetry feature enabled. It +was found that having telemetry enabled had only a negligible impact +(sub-millisecond) on system performance for handling most requests. + +By far the most expensive action is policy loading, which triggers an update to +HTTP request metrics as well as resource, role, and authenticator count metrics. +In this case, there was a 2-4% increase in processing time due to the metric +updates having to wait for a DB write to complete before being able to retrieve +the updated metric values. + +The full set of benchmarks can be reviewed +[here.](https://gist.github.com/gl-johnson/4b7fdb70a3b671f634731fe07615cedd) diff --git a/app/controllers/policies_controller.rb b/app/controllers/policies_controller.rb index b28ff8e23d..bf3e7da476 100644 --- a/app/controllers/policies_controller.rb +++ b/app/controllers/policies_controller.rb @@ -3,9 +3,9 @@ class PoliciesController < RestController include FindResource include AuthorizeResource - before_action :current_user before_action :find_or_create_root_policy + after_action :publish_event, if: -> { response.successful? } rescue_from Sequel::UniqueConstraintViolation, with: :concurrent_load @@ -127,6 +127,10 @@ def create_roles(actor_roles) end end + def publish_event + Monitoring::PubSub.instance.publish('conjur.policy_loaded') + end + # If annotation authn/api-key changed from false to true during policy load, # the DB trigger set it to APIKEY. We need to update the api_key to a real one. def update_roles diff --git a/app/domain/errors.rb b/app/domain/errors.rb index 2a9c59c5a3..6593562702 100644 --- a/app/domain/errors.rb +++ b/app/domain/errors.rb @@ -748,4 +748,12 @@ module Util code: "CONJ00044E" ) end + + module Monitoring + + InvalidOrMissingMetricType = ::Util::TrackableErrorClass.new( + msg: "Invalid or missing metric type: {0-metric-type}", + code: "CONJ00152E" + ) + end end diff --git a/app/domain/logs.rb b/app/domain/logs.rb index 4c72d8db1a..e3a8126ecf 100644 --- a/app/domain/logs.rb +++ b/app/domain/logs.rb @@ -936,4 +936,11 @@ module Config code: "CONJ00150W" ) end + + module Monitoring + ExceptionDuringRequestRecording = ::Util::TrackableLogMessageClass.new( + msg: "Exception during request recording: {0-exception}", + code: "CONJ00151D" + ) + end end diff --git a/bin/conjur-cli.rb b/bin/conjur-cli.rb index 8e898b7399..213b864e26 100644 --- a/bin/conjur-cli.rb +++ b/bin/conjur-cli.rb @@ -42,6 +42,10 @@ c.default_value(ENV['PORT'] || '80') c.flag [ :p, :port ] + c.desc 'Skip running database migrations on start' + c.default_value false + c.switch :'no-migrate' + c.desc 'Server bind address' c.default_value(ENV['BIND_ADDRESS'] || '0.0.0.0') c.arg_name :ip @@ -55,7 +59,8 @@ password_from_stdin: options["password-from-stdin"], file_name: options[:file], bind_address: options[:'bind-address'], - port: options[:port] + port: options[:port], + no_migrate: options[:'no-migrate'] ) end end diff --git a/bin/conjur-cli/commands/server.rb b/bin/conjur-cli/commands/server.rb index 2716d0ea47..1100d18b08 100644 --- a/bin/conjur-cli/commands/server.rb +++ b/bin/conjur-cli/commands/server.rb @@ -1,16 +1,19 @@ # frozen_string_literal: true require 'command_class' +require 'sequel' # Required to use $CHILD_STATUS require 'English' require_relative 'db/migrate' +require_relative 'connect_database' module Commands Server ||= CommandClass.new( dependencies: { - migrate_database: DB::Migrate.new + migrate_database: DB::Migrate.new, + connect_database: ConnectDatabase.new }, inputs: %i[ @@ -19,6 +22,7 @@ module Commands file_name bind_address port + no_migrate ] ) do def call @@ -26,7 +30,9 @@ def call # and the schema is up-to-date @migrate_database.call( preview: false - ) + ) unless @no_migrate + + @connect_database.call if @no_migrate # Create and bootstrap the initial # Conjur account and policy diff --git a/config/application.rb b/config/application.rb index 0d05d3dd47..3ccf072ddd 100644 --- a/config/application.rb +++ b/config/application.rb @@ -43,26 +43,9 @@ class Application < Rails::Application config.autoload_paths << Rails.root.join('lib') - config.sequel.after_connect = proc do - Sequel.extension(:core_extensions, :postgres_schemata) - Sequel::Model.db.extension(:pg_array, :pg_inet) - end - - #The default connection pool does not support closing connections. - # We must be able to close connections on demand to clear the connection cache - # after policy loads [cyberark/conjur#2584](https://github.com/cyberark/conjur/pull/2584) - # The [ShardedThreadedConnectionPool](https://www.rubydoc.info/github/jeremyevans/sequel/Sequel/ShardedThreadedConnectionPool) does support closing connections on-demand. - # Sequel is configured to use the ShardedThreadedConnectionPool by setting the servers configuration on - # the database connection [docs](https://www.rubydoc.info/github/jeremyevans/sequel/Sequel%2FShardedThreadedConnectionPool:servers) - config.sequel.servers = {} - config.encoding = "utf-8" config.active_support.escape_html_entities_in_json = true - # Whether to dump the schema after successful migrations. - # Defaults to false in production and test, true otherwise. - config.sequel.schema_dump = false - # Sets all the blank Environment Variables to nil. This ensures that nil # checks are sufficient to verify the usage of an environment variable. ENV.each_pair do |(k, v)| diff --git a/config/initializers/prometheus.rb b/config/initializers/prometheus.rb new file mode 100644 index 0000000000..4986e1cf32 --- /dev/null +++ b/config/initializers/prometheus.rb @@ -0,0 +1,32 @@ +Rails.application.configure do + # The PubSub module needs to be loaded regardless of whether telemetry is + # enabled to prevent errors if/when the injected code executes + require 'monitoring/pub_sub' + return unless config.conjur_config.telemetry_enabled + + # Require all defined metrics/modules + Dir.glob(Rails.root + 'lib/monitoring/**/*.rb', &method(:require)) + + # Register new metrics and setup the Prometheus client store + metrics = [ + Monitoring::Metrics::ApiRequestCounter.new, + Monitoring::Metrics::ApiRequestHistogram.new, + Monitoring::Metrics::ApiExceptionCounter.new, + Monitoring::Metrics::PolicyResourceGauge.new, + Monitoring::Metrics::PolicyRoleGauge.new, + Monitoring::Metrics::AuthenticatorGauge.new, + ] + registry = ::Prometheus::Client::Registry.new + + # Use a callback to perform lazy setup on first incoming request + # - avoids race condition with DB initialization + lazy_init = lambda do + Monitoring::Prometheus.setup(metrics: metrics, registry: registry) + end + + # Initialize Prometheus middleware. We want to ensure that the middleware + # which collects and exports metrics is loaded at the start of the + # middleware chain to prevent any modifications to incoming HTTP requests + Rails.application.config.middleware.insert_before(0, Monitoring::Middleware::PrometheusExporter, registry: registry, path: '/metrics') + Rails.application.config.middleware.insert_before(0, Monitoring::Middleware::PrometheusCollector, pubsub: Monitoring::PubSub.instance, lazy_init: lazy_init) +end diff --git a/config/initializers/sequel.rb b/config/initializers/sequel.rb index 382cf2798f..a15d1f8492 100644 --- a/config/initializers/sequel.rb +++ b/config/initializers/sequel.rb @@ -10,3 +10,46 @@ def write_id_to_json response, field response[field] = value if value end end + +Rails.application.configure do + config.sequel.after_connect = proc do + Sequel.extension(:core_extensions, :postgres_schemata) + Sequel::Model.db.extension(:pg_array, :pg_inet) + end + + # The default connection pool does not support closing connections. + # We must be able to close connections on demand to clear the connection cache + # after policy loads [cyberark/conjur#2584](https://github.com/cyberark/conjur/pull/2584) + # The [ShardedThreadedConnectionPool](https://www.rubydoc.info/github/jeremyevans/sequel/Sequel/ShardedThreadedConnectionPool) does support closing connections on-demand. + # Sequel is configured to use the ShardedThreadedConnectionPool by setting the servers configuration on + # the database connection [docs](https://www.rubydoc.info/github/jeremyevans/sequel/Sequel%2FShardedThreadedConnectionPool:servers) + config.sequel.servers = {} + + # Whether to dump the schema after successful migrations. + # Defaults to false in production and test, true otherwise. + config.sequel.schema_dump = false + + # Max Postgres connections should be no less than the number of threads + # available to the web worker to avoid pool timeouts. + begin + threads_count = Integer(ENV['RAILS_MAX_THREADS'] || 16) + rescue ArgumentError + raise( + "Invalid value for RAILS_MAX_THREADS environment variable: " \ + "'#{ENV['RAILS_MAX_THREADS']}'. " \ + "Value must be a positive integer (default is 16)." + ) + end + + begin + connections_per_thread = Float(ENV['DATABASE_CONNECTIONS_PER_THREAD'] || 1.2) + rescue ArgumentError + raise( + "Invalid value for DATABASE_CONNECTIONS_PER_THREAD environment variable: " \ + "'#{ENV['DATABASE_CONNECTIONS_PER_THREAD']}'. " \ + "Value must be a positive decimal (default is 1.2)." + ) + end + + config.sequel.max_connections = (threads_count * connections_per_thread).ceil +end diff --git a/config/puma.rb b/config/puma.rb index b7d9f911f0..f8b8c5579e 100644 --- a/config/puma.rb +++ b/config/puma.rb @@ -1,7 +1,24 @@ # frozen_string_literal: true -workers Integer(ENV['WEB_CONCURRENCY'] || 2) -threads_count = Integer(ENV['RAILS_MAX_THREADS'] || 5) +begin + workers Integer(ENV['WEB_CONCURRENCY'] || 2) +rescue ArgumentError + raise( + "Invalid value for WEB_CONCURRENCY environment variable: " \ + "'#{ENV['WEB_CONCURRENCY']}'. " \ + "Value must be a positive integer (default is 2)." + ) +end + +begin + threads_count = Integer(ENV['RAILS_MAX_THREADS'] || 5) +rescue ArgumentError + raise( + "Invalid value for RAILS_MAX_THREADS environment variable: " \ + "'#{ENV['RAILS_MAX_THREADS']}'. " \ + "Value must be a positive integer (default is 5)." + ) +end threads threads_count, threads_count # The tag is displayed in the Puma process description, for example: diff --git a/cucumber/policy/features/deletion.feature b/cucumber/policy/features/deletion.feature index 9f6ac3b285..818d8dd102 100644 --- a/cucumber/policy/features/deletion.feature +++ b/cucumber/policy/features/deletion.feature @@ -309,3 +309,62 @@ Feature: Deleting objects and relationships. record: !variable to_be_deleted """ Then variable "to_be_deleted" does not exist + And I list the roles permitted to read variable "db/password" + Then the role list does not include host "host-01" + + @smoke + Scenario: The bulk !deny statement can be used to revoke a permission from roles and members. + Given I load a policy: + """ + - !variable db/address + - !variable db/username + - !variable db/password + - !host host-01 + - !host host-02 + - !host host-03 + - !permit + resources: + - !variable db/address + - !variable db/username + - !variable db/password + privileges: [ update ] + roles: + - !host host-01 + - !host host-02 + - !host host-03 + """ + And I list the roles permitted to update variable "db/address" + Then the role list includes host "host-01" + Then the role list includes host "host-02" + Then the role list includes host "host-03" + And I list the roles permitted to update variable "db/username" + Then the role list includes host "host-01" + Then the role list includes host "host-02" + Then the role list includes host "host-03" + And I list the roles permitted to update variable "db/password" + Then the role list includes host "host-01" + Then the role list includes host "host-02" + Then the role list includes host "host-03" + And I update the policy with: + """ + - !deny + resources: + - !variable db/address + - !variable db/username + privileges: [ update ] + roles: + - !host host-01 + - !host host-02 + """ + When I list the roles permitted to update variable "db/address" + Then the role list does not include host "host-01" + And the role list does not include host "host-02" + And the role list includes host "host-03" + When I list the roles permitted to update variable "db/username" + Then the role list does not include host "host-01" + And the role list does not include host "host-02" + And the role list includes host "host-03" + When I list the roles permitted to update variable "db/password" + Then the role list includes host "host-01" + And the role list includes host "host-02" + And the role list includes host "host-03" diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml index c168d67b73..9e9e207efd 100644 --- a/dev/docker-compose.yml +++ b/dev/docker-compose.yml @@ -183,6 +183,19 @@ services: volumes: - ../ci/jwt/:/usr/src/jwks/ + prometheus: + image: prom/prometheus + volumes: + - ./files/prometheus:/etc/prometheus + ports: + - 9090:9090 + command: --web.enable-lifecycle --config.file=/etc/prometheus/prometheus.yml + + # Node exporter provides CPU and Memory metrics to Prometheus for the Docker + # host machine. + node-exporter: + image: quay.io/prometheus/node-exporter:latest + ephemeral-secrets: image: 238637036211.dkr.ecr.us-east-1.amazonaws.com/mgmt-ephemeral-service-dev-repository-conjur:latest ports: diff --git a/dev/files/prometheus/alerts.yml b/dev/files/prometheus/alerts.yml new file mode 100644 index 0000000000..1c2a0b683f --- /dev/null +++ b/dev/files/prometheus/alerts.yml @@ -0,0 +1,27 @@ +groups: + - name: Hardware alerts + rules: + - alert: Node down + expr: up{job="node_exporter"} == 0 + for: 3m + labels: + severity: warning + annotations: + title: Node {{ $labels.instance }} is down + description: Failed to scrape {{ $labels.job }} on {{ $labels.instance }} for more than 3 minutes. Node seems down. + + - alert: Low free space + expr: (node_filesystem_free{mountpoint !~ "/mnt.*"} / node_filesystem_size{mountpoint !~ "/mnt.*"} * 100) < 15 + for: 1m + labels: + severity: warning + annotations: + title: Low free space on {{ $labels.instance }} + description: On {{ $labels.instance }} device {{ $labels.device }} mounted on {{ $labels.mountpoint }} has low free space of {{ $value }}% + + - alert: Conjur Down + expr: up{job="conjur"} < 1 + for: 1m + annotations: + title: Conjur is down + description: Failed to scrape Conjur on {{ $labels.instance }} for more than 1 minute. Node seems down. diff --git a/dev/files/prometheus/prometheus.yml b/dev/files/prometheus/prometheus.yml new file mode 100644 index 0000000000..d9743c256b --- /dev/null +++ b/dev/files/prometheus/prometheus.yml @@ -0,0 +1,21 @@ +global: + scrape_interval: "15s" + +rule_files: + - alert.yml + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: + - "localhost:9090" + + - job_name: "node-exporter" + static_configs: + - targets: + - "node-exporter:9100" + + - job_name: "conjur" + static_configs: + - targets: + - "conjur:3000" diff --git a/dev/start b/dev/start index 3960660df5..ec91de8df9 100755 --- a/dev/start +++ b/dev/start @@ -30,6 +30,7 @@ ENABLE_AUTHN_IAM=false ENABLE_AUTHN_JWT=false ENABLE_AUTHN_LDAP=false ENABLE_AUTHN_OIDC=false +ENABLE_METRICS=false ENABLE_OIDC_ADFS=false ENABLE_OIDC_IDENTITY=false ENABLE_OIDC_KEYCLOAK=false @@ -77,6 +78,8 @@ main() { init_oidc init_rotators init_ephemeral_secrets + init_metrics + # Updates CONJUR_AUTHENTICATORS and restarts required services. start_auth_services create_alice @@ -101,6 +104,7 @@ Usage: start [options] 'curl -X POST -d "alice" http://localhost:3000/authn-ldap/test/cucumber/alice/authenticate' -h, --help Shows this help message. --identity-user Identity user to create in Conjur + --metrics Starts with the prometheus telemetry features enabled --oidc-adfs Adds to authn-oidc adfs static env configuration --oidc-identity Starts with authn-oidc/identity as available authenticator. Must be paired with --identity-user flag. @@ -127,6 +131,7 @@ parse_options() { --authn-iam ) ENABLE_AUTHN_IAM=true ; shift ;; --authn-jwt ) ENABLE_AUTHN_JWT=true ; ENABLE_OIDC_KEYCLOAK=true ; shift ;; --authn-ldap ) ENABLE_AUTHN_LDAP=true ; shift ;; + --metrics ) ENABLE_METRICS=true ; shift ;; -h | --help ) print_help ; shift ;; --identity-user ) IDENTITY_USER="$2" ; shift ; shift ;; --oidc-adfs ) ENABLE_AUTHN_OIDC=true ; ENABLE_OIDC_ADFS=true ; shift ;; @@ -498,6 +503,14 @@ init_iam() { "/src/conjur-server/dev/files/authn-iam/policy.yml" } +init_metrics() { + if [[ $ENABLE_METRICS != true ]]; then + return + fi + env_args+=(-e "CONJUR_TELEMETRY_ENABLED=true") + services+=(prometheus node-exporter) +} + start_auth_services() { echo "Setting CONJUR_AUTHENTICATORS to: $enabled_authenticators" env_args+=(-e "CONJUR_AUTHENTICATORS=$enabled_authenticators") diff --git a/distrib/conjur/etc/possum.conf b/distrib/conjur/etc/possum.conf index 922058e79f..1909707bbb 100644 --- a/distrib/conjur/etc/possum.conf +++ b/distrib/conjur/etc/possum.conf @@ -1,4 +1,2 @@ -PUMA_THREAD_MIN=0 -PUMA_THREAD_MAX=16 +RAILS_MAX_THREADS=16 PORT=5000 - diff --git a/lib/conjur/conjur_config.rb b/lib/conjur/conjur_config.rb index 40f0f0c382..5a8c29c823 100644 --- a/lib/conjur/conjur_config.rb +++ b/lib/conjur/conjur_config.rb @@ -39,6 +39,7 @@ class ConjurConfig < Anyway::Config authn_api_key_default: true, authenticators: [], extensions: [], + telemetry_enabled: false, slosilo_rotation_interval: 24, # Sloislo rotation should be every 24 hours tenant_id: @tenant_id, tenant_name: @tenant_name, @@ -93,6 +94,7 @@ def initialize( invalid << "trusted_proxies" unless trusted_proxies_valid? invalid << "authenticators" unless authenticators_valid? + invalid << "telemetry_enabled" unless telemetry_enabled_valid? unless invalid.empty? msg = "Invalid values for configured attributes: #{invalid.join(',')}" @@ -270,5 +272,9 @@ def authenticators_valid? rescue false end + + def telemetry_enabled_valid? + [true, false].include?(telemetry_enabled) + end end end diff --git a/lib/monitoring/metrics.rb b/lib/monitoring/metrics.rb new file mode 100644 index 0000000000..ede73fc5aa --- /dev/null +++ b/lib/monitoring/metrics.rb @@ -0,0 +1,60 @@ +module Monitoring + module Metrics + extend self + + def create_metric(metric, type) + case type.to_sym + when :counter + create_counter_metric(metric) + when :gauge + create_gauge_metric(metric) + when :histogram + create_histogram_metric(metric) + else + raise Errors::Monitoring::InvalidOrMissingMetricType, type.to_s + end + end + + private + + def create_gauge_metric(metric) + gauge = ::Prometheus::Client::Gauge.new( + metric.metric_name, + docstring: metric.docstring, + labels: metric.labels, + store_settings: { + aggregation: :most_recent + } + ) + metric.registry.register(gauge) + setup_subscriber(metric) + end + + def create_counter_metric(metric) + counter = ::Prometheus::Client::Counter.new( + metric.metric_name, + docstring: metric.docstring, + labels: metric.labels + ) + metric.registry.register(counter) + setup_subscriber(metric) + end + + def create_histogram_metric(metric) + histogram = ::Prometheus::Client::Histogram.new( + metric.metric_name, + docstring: metric.docstring, + labels: metric.labels + ) + metric.registry.register(histogram) + setup_subscriber(metric) + end + + def setup_subscriber(metric) + metric.pubsub.subscribe(metric.sub_event_name) do |payload| + metric.update(payload) + end + end + + end +end diff --git a/lib/monitoring/metrics/api_exception_counter.rb b/lib/monitoring/metrics/api_exception_counter.rb new file mode 100644 index 0000000000..616512384f --- /dev/null +++ b/lib/monitoring/metrics/api_exception_counter.rb @@ -0,0 +1,29 @@ +module Monitoring + module Metrics + class ApiExceptionCounter + attr_reader :registry, :pubsub, :metric_name, :docstring, :labels, :sub_event_name + + def setup(registry, pubsub) + @registry = registry + @pubsub = pubsub + @metric_name = :conjur_http_server_request_exceptions_total + @docstring = 'The total number of API exceptions raised by Conjur.' + @labels = %i[operation exception message] + @sub_event_name = 'conjur.request_exception' + + # Create/register the metric + Metrics.create_metric(self, :counter) + end + + def update(payload) + metric = registry.get(metric_name) + update_labels = { + operation: payload[:operation], + exception: payload[:exception], + message: payload[:message] + } + metric.increment(labels: update_labels) + end + end + end +end diff --git a/lib/monitoring/metrics/api_request_counter.rb b/lib/monitoring/metrics/api_request_counter.rb new file mode 100644 index 0000000000..cd27e00d89 --- /dev/null +++ b/lib/monitoring/metrics/api_request_counter.rb @@ -0,0 +1,28 @@ +module Monitoring + module Metrics + class ApiRequestCounter + attr_reader :registry, :pubsub, :metric_name, :docstring, :labels, :sub_event_name + + def setup(registry, pubsub) + @registry = registry + @pubsub = pubsub + @metric_name = :conjur_http_server_requests_total + @docstring = 'The total number of HTTP requests handled by Conjur.' + @labels = %i[code operation] + @sub_event_name = 'conjur.request' + + # Create/register the metric + Metrics.create_metric(self, :counter) + end + + def update(payload) + metric = registry.get(metric_name) + update_labels = { + code: payload[:code], + operation: payload[:operation] + } + metric.increment(labels: update_labels) + end + end + end +end diff --git a/lib/monitoring/metrics/api_request_histogram.rb b/lib/monitoring/metrics/api_request_histogram.rb new file mode 100644 index 0000000000..0d7fa2d59b --- /dev/null +++ b/lib/monitoring/metrics/api_request_histogram.rb @@ -0,0 +1,27 @@ +module Monitoring + module Metrics + class ApiRequestHistogram + attr_reader :registry, :pubsub, :metric_name, :docstring, :labels, :sub_event_name + + def setup(registry, pubsub) + @registry = registry + @pubsub = pubsub + @metric_name = :conjur_http_server_request_duration_seconds + @docstring = 'The HTTP response duration of requests handled by Conjur.' + @labels = %i[operation] + @sub_event_name = 'conjur.request' + + # Create/register the metric + Metrics.create_metric(self, :histogram) + end + + def update(payload) + metric = registry.get(metric_name) + update_labels = { + operation: payload[:operation] + } + metric.observe(payload[:duration], labels: update_labels) + end + end + end +end diff --git a/lib/monitoring/metrics/authenticator_gauge.rb b/lib/monitoring/metrics/authenticator_gauge.rb new file mode 100644 index 0000000000..66bb1891ee --- /dev/null +++ b/lib/monitoring/metrics/authenticator_gauge.rb @@ -0,0 +1,64 @@ +module Monitoring + module Metrics + class AuthenticatorGauge + attr_reader :registry, :pubsub, :metric_name, :docstring, :labels, :sub_event_name + + def initialize(installed_authenticators: Authentication::InstalledAuthenticators) + @metric_name = :conjur_server_authenticator + @docstring = 'Number of authenticators enabled' + @labels = %i[type status] + @sub_event_name = 'conjur.policy_loaded' + + @installed_authenticators = installed_authenticators + end + + def setup(registry, pubsub) + @registry = registry + @pubsub = pubsub + + # Create/register the metric + Metrics.create_metric(self, :gauge) + + # Run update to set the initial counts on startup + update + end + + def update(*_payload) + metric = registry.get(metric_name) + update_enabled_authenticators(metric) + update_installed_authenticators(metric) + update_configured_authenticators(metric) + end + + def update_enabled_authenticators(metric) + enabled_authenticators = @installed_authenticators.enabled_authenticators + enabled_authenticator_counts = get_authenticator_counts(enabled_authenticators) + enabled_authenticator_counts.each do |type, count| + metric.set(count, labels: { type: type, status: 'enabled' }) + end + end + + def update_installed_authenticators(metric) + installed_authenticators = @installed_authenticators.authenticators(ENV).keys + installed_authenticators.each do |type| + metric.set(1, labels: { type: type, status: 'installed' }) + end + end + + def update_configured_authenticators(metric) + configured_authenticators = @installed_authenticators.configured_authenticators + configured_authenticator_counts = get_authenticator_counts(configured_authenticators) + configured_authenticator_counts.each do |type, count| + metric.set(count, labels: { type: type, status: 'configured' }) + end + end + + def get_authenticator_counts(authenticators) + authenticators.each_with_object(Hash.new(0)) do |authenticator, rtn| + type = authenticator.split('/').first + rtn[type] += 1 + end + end + end + end +end diff --git a/lib/monitoring/metrics/policy_resouce_gauge.rb b/lib/monitoring/metrics/policy_resouce_gauge.rb new file mode 100644 index 0000000000..b7a1d1a061 --- /dev/null +++ b/lib/monitoring/metrics/policy_resouce_gauge.rb @@ -0,0 +1,29 @@ +module Monitoring + module Metrics + class PolicyResourceGauge + attr_reader :registry, :pubsub, :metric_name, :docstring, :labels, :sub_event_name + + def setup(registry, pubsub) + @registry = registry + @pubsub = pubsub + @metric_name = :conjur_resource_count + @docstring = 'Number of resources in Conjur database' + @labels = %i[kind] + @sub_event_name = 'conjur.policy_loaded' + + # Create/register the metric + Metrics.create_metric(self, :gauge) + + # Run update to set the initial counts on startup + update + end + + def update(*_payload) + metric = registry.get(metric_name) + Monitoring::QueryHelper.instance.policy_resource_counts.each do |kind, value| + metric.set(value, labels: { kind: kind }) + end + end + end + end +end diff --git a/lib/monitoring/metrics/policy_role_gauge.rb b/lib/monitoring/metrics/policy_role_gauge.rb new file mode 100644 index 0000000000..540b7bfa48 --- /dev/null +++ b/lib/monitoring/metrics/policy_role_gauge.rb @@ -0,0 +1,29 @@ +module Monitoring + module Metrics + class PolicyRoleGauge + attr_reader :registry, :pubsub, :metric_name, :docstring, :labels, :sub_event_name + + def setup(registry, pubsub) + @registry = registry + @pubsub = pubsub + @metric_name = :conjur_role_count + @docstring = 'Number of roles in Conjur database' + @labels = %i[kind] + @sub_event_name = 'conjur.policy_loaded' + + # Create/register the metric + Metrics.create_metric(self, :gauge) + + # Run update to set the initial counts on startup + update + end + + def update(*_payload) + metric = registry.get(metric_name) + Monitoring::QueryHelper.instance.policy_role_counts.each do |kind, value| + metric.set(value, labels: { kind: kind }) unless kind == '!' + end + end + end + end +end diff --git a/lib/monitoring/middleware/prometheus_collector.rb b/lib/monitoring/middleware/prometheus_collector.rb new file mode 100644 index 0000000000..a58d9892f6 --- /dev/null +++ b/lib/monitoring/middleware/prometheus_collector.rb @@ -0,0 +1,63 @@ +require 'benchmark' +require_relative '../operations' + +module Monitoring + module Middleware + class PrometheusCollector + attr_reader :app + + def initialize(app, options = {}) + @app = app + @pubsub = options[:pubsub] + @lazy_init = options[:lazy_init] + end + + def call(env) # :nodoc: + unless @initialized + @initialized = true + @lazy_init.call + end + trace(env) { @app.call(env) } + end + + protected + + # Trace HTTP requests + def trace(env) + response = nil + operation = find_operation(env['REQUEST_METHOD'], env['PATH_INFO']) + duration = Benchmark.realtime { response = yield } + record(env, response.first.to_s, duration, operation) + response + rescue => e + @pubsub.publish( + "conjur.request_exception", + operation: operation, + exception: e.class.name, + message: e + ) + raise + end + + def record(_env, code, duration, operation) + @pubsub.publish( + "conjur.request", + code: code, + operation: operation, + duration: duration + ) + rescue => e + @logger.debug(LogMessages::Monitoring::ExceptionDuringRequestRecording.new(e.inspect)) + end + + def find_operation(method, path) + Monitoring::Metrics::OPERATIONS.each do |op| + if op[:method] == method && op[:pattern].match?(path) + return op[:operation] + end + end + "unknown" + end + end + end +end diff --git a/lib/monitoring/middleware/prometheus_exporter.rb b/lib/monitoring/middleware/prometheus_exporter.rb new file mode 100644 index 0000000000..a233c19662 --- /dev/null +++ b/lib/monitoring/middleware/prometheus_exporter.rb @@ -0,0 +1,89 @@ +require 'prometheus/client' +require 'prometheus/client/formats/text' + +module Monitoring + module Middleware + # Exporter is a Rack middleware that provides a sample implementation of a + # Prometheus HTTP exposition endpoint. + # + # By default it will export the state of the global registry and expose it + # under `/metrics`. Use the `:registry` and `:path` options to change the + # defaults. + class PrometheusExporter + attr_reader :app + + FORMATS = [::Prometheus::Client::Formats::Text].freeze + FALLBACK = ::Prometheus::Client::Formats::Text + + def initialize(app, options = {}) + @app = app + @path = options[:path] + @registry = options[:registry] + @acceptable = build_dictionary(FORMATS, FALLBACK) + end + + def call(env) + if env['PATH_INFO'] == @path + format = negotiate(env, @acceptable) + format ? respond_with(format) : not_acceptable(FORMATS) + else + @app.call(env) + end + end + + private + + def negotiate(env, formats) + parse(env.fetch('HTTP_ACCEPT', '*/*')).each do |content_type, _| + return formats[content_type] if formats.key?(content_type) + end + + nil + end + + def parse(header) + header.split(/\s*,\s*/).map do |type| + attributes = type.split(/\s*;\s*/) + quality = extract_quality(attributes) + + [attributes.join('; '), quality] + end.sort_by(&:last).reverse + end + + def extract_quality(attributes, default = 1.0) + quality = default + + attributes.delete_if do |attr| + quality = attr.split('q=').last.to_f if attr.start_with?('q=') + end + + quality + end + + def respond_with(format) + [ + 200, + { 'Content-Type' => format::CONTENT_TYPE }, + [format.marshal(@registry)] + ] + end + + def not_acceptable(formats) + types = formats.map { |format| format::MEDIA_TYPE } + + [ + 406, + { 'Content-Type' => 'text/plain' }, + ["Supported media types: #{types.join(', ')}"] + ] + end + + def build_dictionary(formats, fallback) + formats.each_with_object('*/*' => fallback) do |format, memo| + memo[format::CONTENT_TYPE] = format + memo[format::MEDIA_TYPE] = format + end + end + end + end +end diff --git a/lib/monitoring/operations.rb b/lib/monitoring/operations.rb new file mode 100644 index 0000000000..55c8fe39d6 --- /dev/null +++ b/lib/monitoring/operations.rb @@ -0,0 +1,227 @@ +module Monitoring + module Metrics + OPERATIONS = [ + # AccountsApi (undocumented) + { + method: "POST", + pattern: %r{^(/accounts)$}, + operation: "createAccount" + }, + { + method: "GET", + pattern: %r{^(/accounts)$}, + operation: "getAccounts" + }, + { + method: "DELETE", + pattern: %r{^(/accounts)(/[^/]+)$}, + operation: "deleteAccount" + }, + + # AuthenticationApi + { + method: "PUT", + pattern: %r{^(/authn)(/[^/]+)(/password)$}, + operation: "changePassword" + }, + { + method: "PATCH", + pattern: %r{^(/authn-)([^/]+)(/[^/]+){2,3}$}, + operation: "enableAuthenticatorInstance" + }, + { + method: "GET", + pattern: %r{^(/authn)(/[^/]+)(/login)$}, + operation: "getAPIKey" + }, + { + method: "GET", + pattern: %r{^(/authn-ldap)(/[^/]+){2}(/login)$}, + operation: "getAPIKeyViaLDAP" + }, + { + method: "POST", + pattern: %r{^(/authn)(/[^/]+){2}(/authenticate)$}, + operation: "getAccessToken" + }, + { + method: "POST", + pattern: %r{^(/authn-iam)(/[^/]+){3}(/authenticate)$}, + operation: "getAccessTokenViaAWS" + }, + { + method: "POST", + pattern: %r{^(/authn-azure)(/[^/]+){3}(/authenticate)$}, + operation: "getAccessTokenViaAzure" + }, + { + method: "POST", + pattern: %r{^(/authn-gcp)(/[^/]+)(/authenticate)$}, + operation: "getAccessTokenViaGCP" + }, + { + method: "POST", + pattern: %r{^(/authn-k8s)(/[^/]+){3}(/authenticate)$}, + operation: "getAccessTokenViaKubernetes" + }, + { + method: "POST", + pattern: %r{^(/authn-ldap)(/[^/]+){3}(/authenticate)$}, + operation: "getAccessTokenViaLDAP" + }, + { + method: "POST", + pattern: %r{^(/authn-oidc)(/[^/]+){2}(/authenticate)$}, + operation: "getAccessTokenViaOIDC" + }, + { + method: "POST", + pattern: %r{^(/authn-jwt)(/[^/]+){2,3}(/authenticate)$}, + operation: "getAccessTokenViaJWT" + }, + { + method: "POST", + pattern: %r{^(/authn-k8s)(/[^/]+)(/inject_client_cert)$}, + operation: "k8sInjectClientCert" + }, + { + method: "PUT", + pattern: %r{^(/authn)(/[^/]+)(/api_key)$}, + operation: "rotateAPIKey" + }, + + # CertificateAuthorityApi + { + method: "POST", + pattern: %r{^(/ca)(/[^/]+){2}(/sign)$}, + operation: "sign" + }, + + # HostFactoryApi + { + method: "POST", + pattern: %r{^(/host_factories/hosts)$}, + operation: "createHost" + }, + { + method: "POST", + pattern: %r{^(/host_factory_tokens)$}, + operation: "createToken" + }, + { + method: "DELETE", + pattern: %r{^(/host_factory_tokens)(/[^/]+)$}, + operation: "revokeToken" + }, + + # MetricsApi + { + method: "GET", + pattern: %r{^(/metrics)$}, + operation: "getMetrics" + }, + + # PoliciesApi + { + method: "POST", + pattern: %r{^(/policies)(/[^/]+){2,3}(/.*)$}, + operation: "loadPolicy" + }, + { + method: "PUT", + pattern: %r{^(/policies)(/[^/]+){2,3}(/.*)$}, + operation: "replacePolicy" + }, + { + method: "PATCH", + pattern: %r{^(/policies)(/[^/]+){2,3}(/.*)$}, + operation: "updatePolicy" + }, + + # PublicKeysApi + { + method: "GET", + pattern: %r{^(/public_keys)(/[^/]+){3}$}, + operation: "showPublicKeys" + }, + + # ResourcesApi + { + method: "GET", + pattern: %r{^(/resources)(/[^/]+){3}(/.*)$}, + operation: "showResource" + }, + { + method: "GET", + pattern: %r{^(/resources)(/[^/]+){1}$}, + operation: "showResourcesForAccount" + }, + { + method: "GET", + pattern: %r{^(/resources$)}, + operation: "showResourcesForAllAccounts" + }, + { + method: "GET", + pattern: %r{^(/resources)(/[^/]+){2}$}, + operation: "showResourcesForKind" + }, + + # RolesApi + { + method: "POST", + pattern: %r{^(/roles)(/[^/]+){3}$}, + operation: "addMemberToRole" + }, + { + method: "DELETE", + pattern: %r{^(/roles)(/[^/]+){3}$}, + operation: "removeMemberFromRole" + }, + { + method: "GET", + pattern: %r{^(/roles)(/[^/]+){3}$}, + operation: "showRole" + }, + + # SecretsApi + { + method: "POST", + pattern: %r{^(/secrets)(/[^/]+){2}(/.*)$}, + operation: "createSecret" + }, + { + method: "GET", + pattern: %r{^(/secrets)(/[^/]+){3}$}, + operation: "getSecret" + }, + { + method: "GET", + pattern: %r{^(/secrets)$}, + operation: "getSecrets" + }, + + # StatusApi + { + method: "GET", + pattern: %r{^(/authenticators)$}, + operation: "getAuthenticators" + }, + { + method: "GET", + pattern: %r{^(/authn-gcp)(/[^/]+)(/status)$}, + operation: "getGCPAuthenticatorStatus" + }, + { + method: "GET", + pattern: %r{^(/authn-)([^/]+)(/[^/]+){2}(/status)$}, + operation: "getServiceAuthenticatorStatus" + }, + { + method: "GET", + pattern: %r{^(/whoami)$}, + operation: "whoAmI" + } + ].freeze + end +end diff --git a/lib/monitoring/prometheus.rb b/lib/monitoring/prometheus.rb new file mode 100644 index 0000000000..aa5daf1dd6 --- /dev/null +++ b/lib/monitoring/prometheus.rb @@ -0,0 +1,47 @@ +require 'prometheus/client' +require 'prometheus/client/data_stores/direct_file_store' +require_relative './pub_sub' + +module Monitoring + module Prometheus + extend self + + attr_reader :registry + + def setup(options = {}) + @registry = options[:registry] || ::Prometheus::Client::Registry.new + @metrics_dir_path = ENV['CONJUR_METRICS_DIR'] || '/tmp/prometheus' + @pubsub = options[:pubsub] || PubSub.instance + + # Array of objects representing different metrics. + # Each objects needs a .setup method, responsible for registering metrics + # and subscribing to Pub/Sub events. + @metrics = options[:metrics] || [] + + clear_data_store + configure_data_store + setup_metrics + end + + protected + + def clear_data_store + Dir[File.join(@metrics_dir_path, '*.bin')].each do |file_path| + File.unlink(file_path) + end + end + + def configure_data_store + ::Prometheus::Client.config.data_store = ::Prometheus::Client::DataStores::DirectFileStore.new( + dir: @metrics_dir_path + ) + end + + def setup_metrics + @metrics.each do |metric| + metric.setup(@registry, @pubsub) + end + end + + end +end diff --git a/lib/monitoring/pub_sub.rb b/lib/monitoring/pub_sub.rb new file mode 100644 index 0000000000..6745f0d1a7 --- /dev/null +++ b/lib/monitoring/pub_sub.rb @@ -0,0 +1,24 @@ +require 'singleton' +require 'active_support/notifications' + +module Monitoring + # PubSub wraps ActiveSupport::Notifications, providing pub/sub + # plumbing to custom controllers and collectors. + class PubSub + include Singleton + + def publish(name, payload = {}) + ActiveSupport::Notifications.instrument(name, payload) + end + + def subscribe(name) + ActiveSupport::Notifications.subscribe(name) do |_, _, _, _, payload| + yield(payload) + end + end + + def unsubscribe(name) + ActiveSupport::Notifications.unsubscribe(name) + end + end +end diff --git a/lib/monitoring/query_helper.rb b/lib/monitoring/query_helper.rb new file mode 100644 index 0000000000..7077a03557 --- /dev/null +++ b/lib/monitoring/query_helper.rb @@ -0,0 +1,25 @@ +require 'singleton' + +module Monitoring + class QueryHelper + include Singleton + + def policy_resource_counts + counts = {} + kind = ::Sequel.function(:kind, :resource_id) + Resource.group_and_count(kind).each do |record| + counts[record[:kind]] = record[:count] + end + counts + end + + def policy_role_counts + counts = {} + kind = ::Sequel.function(:kind, :role_id) + Role.group_and_count(kind).each do |record| + counts[record[:kind]] = record[:count] + end + counts + end + end +end diff --git a/spec/conjurctl/commands/server_spec.rb b/spec/conjurctl/commands/server_spec.rb new file mode 100644 index 0000000000..ab49ebe7ef --- /dev/null +++ b/spec/conjurctl/commands/server_spec.rb @@ -0,0 +1,76 @@ +require 'spec_helper' + +require 'commands/server' + +describe Commands::Server do + + let(:account) { "demo" } + let(:password_from_stdin) { false } + let(:file_name) { nil } + let(:bind_address) { "0.0.0.0" } + let(:port) { 80 } + let(:no_migrate) { false } + + let(:migrate_database) { + double('DB::Migrate').tap do |migrate| + allow(migrate).to receive(:call).with(preview: false) + end + } + let(:connect_database) { + double('ConnectDatabase').tap do |connect| + allow(connect).to receive(:call) + end + } + + before do + # Squash process forking for these tests as we have not implemented a full test + # suite and it causes issues + allow(Process).to receive(:fork).and_return(nil) + allow(Process).to receive(:waitall).and_return(nil) + end + + def delete_account(name) + system("conjurctl account delete #{name}") + end + + after(:each) do + delete_account("demo") + end + + + subject do + Commands::Server.new( + migrate_database: migrate_database, + connect_database: connect_database + ).call( + account: account, + password_from_stdin: password_from_stdin, + file_name: file_name, + bind_address: bind_address, + port: port, + no_migrate: no_migrate + ) + end + + it "performs migrations" do + expect(migrate_database).to receive(:call) + + subject + end + + context "With the no_migrate variable set to true" do + let(:no_migrate) { true } + + it "doesn't perform migrations" do + expect(migrate_database).not_to receive(:call) + + subject + end + + it "connects to the database" do + expect(connect_database).to receive(:call) + + subject + end + end +end diff --git a/spec/lib/conjur/conjur_config_spec.rb b/spec/lib/conjur/conjur_config_spec.rb index 88b05c6431..d4c07fb59f 100644 --- a/spec/lib/conjur/conjur_config_spec.rb +++ b/spec/lib/conjur/conjur_config_spec.rb @@ -20,11 +20,14 @@ it "uses default value if not set by environment variable or config file" do expect(subject.trusted_proxies).to eq([]) + expect(subject.telemetry_enabled).to eq(false) end it "reports the attribute source as :defaults" do expect(subject.attribute_sources[:trusted_proxies]). to eq(:defaults) + expect(subject.attribute_sources[:telemetry_enabled]). + to eq(:defaults) end context "with config file" do @@ -32,6 +35,7 @@ <<~YAML trusted_proxies: - 1.2.3.4 + telemetry_enabled: true YAML end @@ -58,11 +62,14 @@ it "reads config value from file" do expect(subject.trusted_proxies).to eq(["1.2.3.4"]) + expect(subject.telemetry_enabled).to eq(true) end it "reports the attribute source as :yml" do expect(subject.attribute_sources[:trusted_proxies]). to eq(:yml) + expect(subject.attribute_sources[:telemetry_enabled]). + to eq(:yml) end context "with a config file that is a string value" do @@ -121,6 +128,7 @@ context "with prefixed env var" do before do ENV['CONJUR_TRUSTED_PROXIES'] = "5.6.7.8" + ENV['CONJUR_TELEMETRY_ENABLED'] = "false" # Anyway Config caches prefixed env vars at the class level so we must # clear the cache to have it pick up the new var with a reload. @@ -129,6 +137,7 @@ after do ENV.delete('CONJUR_TRUSTED_PROXIES') + ENV.delete('CONJUR_TELEMETRY_ENABLED') # Clear again to make sure we don't affect future tests. Anyway.env.clear @@ -136,11 +145,14 @@ it "overrides the config file value" do expect(subject.trusted_proxies).to eq(["5.6.7.8"]) + expect(subject.telemetry_enabled).to eq(false) end it "reports the attribute source as :env" do expect(subject.attribute_sources[:trusted_proxies]). to eq(:env) + expect(subject.attribute_sources[:telemetry_enabled]). + to eq(:env) end end @@ -171,7 +183,8 @@ let(:config_kwargs) do { authenticators: "invalid-authn", - trusted_proxies: "boop" + trusted_proxies: "boop", + telemetry_enabled: "beep" } end @@ -185,6 +198,8 @@ to raise_error(/trusted_proxies/) expect { subject }. to raise_error(/authenticators/) + expect { subject }. + to raise_error(/telemetry_enabled/) end it "does not include the value that failed validation" do @@ -192,6 +207,8 @@ to_not raise_error(/boop/) expect { subject }. to_not raise_error(/invalid-authn/) + expect { subject }. + to_not raise_error(/beep/) end end @@ -347,6 +364,13 @@ end end end + + describe "metrics endpoint is disabled by default", type: :request do + it "returns a 401" do + get '/metrics' + expect(response).to have_http_status(401) + end + end end # Helper method for the config file tests to create a temporary directory for diff --git a/spec/lib/monitoring/metrics/authenticator_metrics_spec.rb b/spec/lib/monitoring/metrics/authenticator_metrics_spec.rb new file mode 100644 index 0000000000..2643638d06 --- /dev/null +++ b/spec/lib/monitoring/metrics/authenticator_metrics_spec.rb @@ -0,0 +1,102 @@ +require 'spec_helper' +require 'monitoring/query_helper' +require 'monitoring/metrics/authenticator_gauge' + +describe 'authenticator metrics', type: :request do + + before do + @authenticator_metric = Monitoring::Metrics::AuthenticatorGauge.new + pubsub.unsubscribe(@authenticator_metric.sub_event_name) + + + # Clear and setup the Prometheus client store + Monitoring::Prometheus.setup( + registry: Prometheus::Client::Registry.new, + metrics: metrics + ) + + Slosilo["authn:rspec"] ||= Slosilo::Key.new + end + + def headers_with_auth(payload) + token_auth_header.merge({ 'RAW_POST_DATA' => payload }) + end + + let(:registry) { Monitoring::Prometheus.registry } + + let(:metrics) { [ @authenticator_metric ] } + + let(:pubsub) { Monitoring::PubSub.instance } + + let(:policies_url) { '/policies/rspec/policy/root' } + + let(:current_user) { Role.find_or_create(role_id: 'rspec:user:admin') } + + let(:token_auth_header) do + bearer_token = Slosilo["authn:rspec"].signed_token(current_user.login) + token_auth_str = + "Token token=\"#{Base64.strict_encode64(bearer_token.to_json)}\"" + { 'HTTP_AUTHORIZATION' => token_auth_str } + end + + context 'when a policy is loaded' do + + it 'publishes a policy load event (POST)' do + expect(Monitoring::PubSub.instance).to receive(:publish).with(@authenticator_metric.sub_event_name) + + post(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'publishes a policy load event (PUT)' do + expect(Monitoring::PubSub.instance).to receive(:publish).with(@authenticator_metric.sub_event_name) + + put(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'publishes a policy load event (PATCH)' do + expect(Monitoring::PubSub.instance).to receive(:publish).with(@authenticator_metric.sub_event_name) + + patch(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'calls update on the correct metrics' do + expect(@authenticator_metric).to receive(:update) + + post(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'updates the registry' do + authenticators_before = registry.get(@authenticator_metric.metric_name).get(labels: { type: 'authn-jwt', status: 'configured' }) + post(policies_url, env: headers_with_auth( + <<~POLICY + - !policy + id: conjur/authn-jwt/sysadmins + body: + - !webservice + + - !group + id: clients + + - !permit + resource: !webservice + privilege: [ read, authenticate ] + role: !group clients + POLICY + )) + + authenticators_after = registry.get(@authenticator_metric.metric_name).get(labels: { type: 'authn-jwt', status: 'configured' }) + + expect(authenticators_after - authenticators_before).to eql(1.0) + end + + it 'trims the authenticator service id' do + authenticators = ['authn', 'authn-iam/some-service/id', 'authn-oidc/some/nested/service-id', 'authn-oidc/some/other/service-id'] + authenticator_counts = @authenticator_metric.get_authenticator_counts(authenticators) + + expect(authenticator_counts['authn']).to eql(1) + expect(authenticator_counts['authn-iam']).to eql(1) + expect(authenticator_counts['authn-oidc']).to eql(2) + end + + end +end diff --git a/spec/lib/monitoring/metrics/policy_metrics_spec.rb b/spec/lib/monitoring/metrics/policy_metrics_spec.rb new file mode 100644 index 0000000000..c4553e76bd --- /dev/null +++ b/spec/lib/monitoring/metrics/policy_metrics_spec.rb @@ -0,0 +1,84 @@ +require 'spec_helper' +require 'monitoring/query_helper' +Dir.glob(Rails.root + 'lib/monitoring/metrics/policy_*.rb', &method(:require)) + +describe 'policy metrics', type: :request do + + before do + @resource_metric = Monitoring::Metrics::PolicyResourceGauge.new + @role_metric = Monitoring::Metrics::PolicyRoleGauge.new + pubsub.unsubscribe(policy_load_event_name) + + # Clear and setup the Prometheus client store + Monitoring::Prometheus.setup( + registry: Prometheus::Client::Registry.new, + metrics: metrics + ) + + Slosilo["authn:rspec"] ||= Slosilo::Key.new + end + + def headers_with_auth(payload) + token_auth_header.merge({ 'RAW_POST_DATA' => payload }) + end + + let(:registry) { Monitoring::Prometheus.registry } + + let(:pubsub) { Monitoring::PubSub.instance } + + let(:metrics) { [ @resource_metric, @role_metric ] } + + let(:policy_load_event_name) { 'conjur.policy_loaded' } + + let(:policies_url) { '/policies/rspec/policy/root' } + + let(:current_user) { Role.find_or_create(role_id: 'rspec:user:admin') } + + let(:token_auth_header) do + bearer_token = Slosilo["authn:rspec"].signed_token(current_user.login) + token_auth_str = + "Token token=\"#{Base64.strict_encode64(bearer_token.to_json)}\"" + { 'HTTP_AUTHORIZATION' => token_auth_str } + end + + context 'when a policy is loaded' do + + it 'publishes a policy load event (POST)' do + expect(Monitoring::PubSub.instance).to receive(:publish).with(policy_load_event_name).once + + post(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'publishes a policy load event (PUT)' do + expect(Monitoring::PubSub.instance).to receive(:publish).with(policy_load_event_name).once + + put(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'publishes a policy load event (PATCH)' do + expect(Monitoring::PubSub.instance).to receive(:publish).with(policy_load_event_name).once + + patch(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'calls update on the correct metrics' do + expect(@resource_metric).to receive(:update) + expect(@role_metric).to receive(:update) + + post(policies_url, env: headers_with_auth('[!variable test]')) + end + + it 'updates the registry' do + resources_before = registry.get(@resource_metric.metric_name).get(labels: { kind: 'group' }) + roles_before = registry.get(@role_metric.metric_name).get(labels: { kind: 'group' }) + + post(policies_url, env: headers_with_auth('[!group test]')) + + resources_after = registry.get(@resource_metric.metric_name).get(labels: { kind: 'group' }) + roles_after = registry.get(@role_metric.metric_name).get(labels: { kind: 'group' }) + + expect(resources_after - resources_before).to eql(1.0) + expect(roles_after - roles_before).to eql(1.0) + end + end +end diff --git a/spec/lib/monitoring/metrics_spec.rb b/spec/lib/monitoring/metrics_spec.rb new file mode 100644 index 0000000000..0eabf114c2 --- /dev/null +++ b/spec/lib/monitoring/metrics_spec.rb @@ -0,0 +1,39 @@ +require 'monitoring/metrics' +require 'prometheus/client' + +RSpec.describe Monitoring::Metrics do + class MockMetric + # does nothing + end + + describe '#create_metric' do + context 'with valid metric type' do + it 'creates a gauge metric' do + expect(Monitoring::Metrics).to receive(:create_gauge_metric).with(MockMetric) + Monitoring::Metrics.create_metric(MockMetric, :gauge) + end + + it 'creates a counter metric' do + expect(Monitoring::Metrics).to receive(:create_counter_metric).with(MockMetric) + Monitoring::Metrics.create_metric(MockMetric, :counter) + end + + it 'creates a histogram metric' do + expect(Monitoring::Metrics).to receive(:create_histogram_metric).with(MockMetric) + Monitoring::Metrics.create_metric(MockMetric, :histogram) + end + + it 'creates a histogram metric (string)' do + expect(Monitoring::Metrics).to receive(:create_histogram_metric).with(MockMetric) + Monitoring::Metrics.create_metric(MockMetric, 'histogram') + end + end + + context 'with invalid metric type' do + it 'raises an error' do + expect { Monitoring::Metrics.create_metric(MockMetric, :invalid_type) } + .to raise_error(Errors::Monitoring::InvalidOrMissingMetricType) + end + end + end +end diff --git a/spec/lib/monitoring/middleware/prometheus_collector_spec.rb b/spec/lib/monitoring/middleware/prometheus_collector_spec.rb new file mode 100644 index 0000000000..471096e890 --- /dev/null +++ b/spec/lib/monitoring/middleware/prometheus_collector_spec.rb @@ -0,0 +1,113 @@ +require 'spec_helper' +require 'monitoring/middleware/prometheus_collector' +require 'monitoring/prometheus' +require 'monitoring/metrics' +Dir.glob(Rails.root + 'lib/monitoring/metrics/api_*.rb', &method(:require)) + + +describe Monitoring::Middleware::PrometheusCollector do + + # Clear out any existing subscribers and reset the data store + before do + pubsub.unsubscribe('conjur.request_exception') + pubsub.unsubscribe('conjur.request') + Monitoring::Prometheus.setup(registry: Prometheus::Client::Registry.new, metrics: metrics) + end + + let(:metrics) { [ + Monitoring::Metrics::ApiRequestCounter.new, + Monitoring::Metrics::ApiRequestHistogram.new, + Monitoring::Metrics::ApiExceptionCounter.new + ] } + + let(:registry) { Monitoring::Prometheus.registry } + + let(:request_counter_metric) { registry.get(:conjur_http_server_requests_total) } + + let(:request_duration_metric) { registry.get(:conjur_http_server_request_duration_seconds) } + + let(:env) { Rack::MockRequest.env_for } + + let(:lazy_init) { + lambda do + # nothing + end + } + + let(:app) do + app = ->(env) { [200, { 'Content-Type' => 'text/html' }, ['OK']] } + end + + let(:pubsub) { Monitoring::PubSub.instance } + + let(:options) { { pubsub: pubsub, lazy_init: lazy_init} } + + subject { described_class.new(app, **options) } + + it 'returns the app response' do + env['PATH_INFO'] = "/foo" + status, _headers, response = subject.call(env) + + expect(status).to eql(200) + expect(response.first).to eql('OK') + end + + it 'traces request information' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2) + + env['PATH_INFO'] = "/foo" + status, _headers, _response = subject.call(env) + + labels = { operation: 'unknown', code: '200' } + expect(request_counter_metric.get(labels: labels)).to eql(1.0) + + labels = { operation: 'unknown' } + expect(request_duration_metric.get(labels: labels)).to include("0.1" => 0, "0.25" => 1) + end + + it 'stores a known operation ID in the metrics store' do + expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2) + + env['PATH_INFO'] = "/whoami" + status, _headers, _response = subject.call(env) + + labels = { operation: 'whoAmI', code: '200' } + expect(request_counter_metric.get(labels: labels)).to eql(1.0) + + labels = { operation: 'whoAmI' } + expect(request_duration_metric.get(labels: labels)).to include("0.1" => 0, "0.25" => 1) + end + + context 'when the app raises an exception' do + + let(:dummy_error) { RuntimeError.new('Dummy error from tests') } + + let(:request_exception_metric) { registry.get(:conjur_http_server_request_exceptions_total) } + + let(:app) do + app = ->(env) { + raise dummy_error if env['PATH_INFO'] == '/broken' + [200, { 'Content-Type' => 'text/html' }, ['OK']] + } + end + + subject { described_class.new(app, **options) } + + before do + subject.call(env) + end + + it 'traces exceptions' do + env['PATH_INFO'] = '/broken' + expect { subject.call(env) }.to raise_error(RuntimeError) + + labels = { + operation: 'unknown', + exception: 'RuntimeError', + message: 'Dummy error from tests' + } + + expect(request_exception_metric.get(labels: labels)).to eql(1.0) + end + end +end diff --git a/spec/lib/monitoring/middleware/prometheus_exporter_spec.rb b/spec/lib/monitoring/middleware/prometheus_exporter_spec.rb new file mode 100644 index 0000000000..cfddf88009 --- /dev/null +++ b/spec/lib/monitoring/middleware/prometheus_exporter_spec.rb @@ -0,0 +1,117 @@ +require 'spec_helper' +require 'monitoring/middleware/prometheus_exporter' + +describe Monitoring::Middleware::PrometheusExporter do + + # Reset the data store + before do + Monitoring::Prometheus.setup(registry: Prometheus::Client::Registry.new) + end + + let(:registry) do + Monitoring::Prometheus.registry + end + + let(:path) { '/metrics' } + + let(:options) { { registry: registry, path: path} } + + let(:env) { Rack::MockRequest.env_for } + + let(:app) do + app = ->(env) { [200, { 'Content-Type' => 'text/html' }, ['OK']] } + end + + subject { described_class.new(app, **options) } + + context 'when requesting app endpoints' do + it 'returns the app response' do + env['PATH_INFO'] = "/foo" + status, _headers, response = subject.call(env) + + expect(status).to eql(200) + expect(response.first).to eql('OK') + end + end + + context 'when requesting /metrics' do + text = Prometheus::Client::Formats::Text + + shared_examples 'ok' do |headers, fmt| + it "responds with 200 OK and Content-Type #{fmt::CONTENT_TYPE}" do + registry.counter(:foo, docstring: 'foo counter').increment(by: 9) + + env['PATH_INFO'] = path + env['HTTP_ACCEPT'] = headers.values[0] if headers.values[0] + + status, headers, response = subject.call(env) + + expect(status).to eql(200) + expect(headers['Content-Type']).to eql(fmt::CONTENT_TYPE) + expect(response.first).to eql(fmt.marshal(registry)) + + end + end + + shared_examples 'not acceptable' do |headers| + it 'responds with 406 Not Acceptable' do + message = 'Supported media types: text/plain' + + env['PATH_INFO'] = path + env['HTTP_ACCEPT'] = headers.values[0] if headers.values[0] + + status, headers, response = subject.call(env) + + expect(status).to eql(406) + expect(headers['Content-Type']).to eql('text/plain') + expect(response.first).to eql(message) + end + end + + context 'when client does not send a Accept header' do + include_examples 'ok', {}, text + end + + context 'when client accepts any media type' do + include_examples 'ok', { 'HTTP_ACCEPT' => '*/*' }, text + end + + context 'when client requests application/json' do + include_examples 'not acceptable', 'HTTP_ACCEPT' => 'application/json' + end + + context 'when client requests text/plain' do + include_examples 'ok', { 'HTTP_ACCEPT' => 'text/plain' }, text + end + + context 'when client uses different white spaces in Accept header' do + accept = 'text/plain;q=1.0 ; version=0.0.4' + + include_examples 'ok', { 'HTTP_ACCEPT' => accept }, text + end + + context 'when client does not include quality attribute' do + accept = 'application/json;q=0.5, text/plain' + + include_examples 'ok', { 'HTTP_ACCEPT' => accept }, text + end + + context 'when client accepts some unknown formats' do + accept = 'text/plain;q=0.3, proto/buf;q=0.7' + + include_examples 'ok', { 'HTTP_ACCEPT' => accept }, text + end + + context 'when client accepts only unknown formats' do + accept = 'fancy/woo;q=0.3, proto/buf;q=0.7' + + include_examples 'not acceptable', 'HTTP_ACCEPT' => accept + end + + context 'when client accepts unknown formats and wildcard' do + accept = 'fancy/woo;q=0.3, proto/buf;q=0.7, */*;q=0.1' + + include_examples 'ok', { 'HTTP_ACCEPT' => accept }, text + end + end +end diff --git a/spec/lib/monitoring/prometheus_spec.rb b/spec/lib/monitoring/prometheus_spec.rb new file mode 100644 index 0000000000..26525ba571 --- /dev/null +++ b/spec/lib/monitoring/prometheus_spec.rb @@ -0,0 +1,75 @@ +require 'prometheus/client/formats/text' +require 'monitoring/prometheus' + +class SampleMetric + def setup(registry, pubsub) + registry.register(::Prometheus::Client::Gauge.new( + :test_gauge, + docstring: '...', + labels: [:test_label] + )) + + pubsub.subscribe("sample_test_gauge") do |payload| + metric = registry.get(:test_gauge) + metric.set(payload[:value], labels: payload[:labels]) + end + end +end + +describe Monitoring::Prometheus do + let(:registry) { + Monitoring::Prometheus.setup + Monitoring::Prometheus.registry + } + + it 'creates a valid registry and allows metrics' do + gauge = registry.gauge(:foo, docstring: '...', labels: [:bar]) + gauge.set(21.534, labels: { bar: 'test' }) + + expect(gauge.get(labels: { bar: 'test' })).to eql(21.534) + end + + it 'can use Pub/Sub events to update metrics on the registry' do + gauge = registry.gauge(:foo, docstring: '...', labels: [:bar]) + + pub_sub = Monitoring::PubSub.instance + pub_sub.subscribe("foo_event_name") do |payload| + labels = { + bar: payload[:bar] + } + gauge.set(payload[:value], labels: labels) + end + + pub_sub.publish("foo_event_name", value: 100, bar: "omicron") + expect(gauge.get(labels: { bar: "omicron" })).to eql(100.0) + end + + context 'when given a list of metrics to setup' do + before do + @metric_obj = SampleMetric.new + @registry = ::Prometheus::Client::Registry.new + @mock_pubsub = double("Mock Monitoring::PubSub") + end + + def prometheus_setup + Monitoring::Prometheus.setup( + registry: @registry, + metrics: [ @metric_obj ], + pubsub: @mock_pubsub + ) + end + + it 'calls .setup for the metric class' do + expect(@metric_obj).to receive(:setup).with(@registry, @mock_pubsub) + prometheus_setup + end + + it 'adds custom metric definitions to the global registry and subscribes to related Pub/Sub events' do + expect(@mock_pubsub).to receive(:subscribe).with("sample_test_gauge") + prometheus_setup + + sample_metric = @registry.get(:test_gauge) + expect(sample_metric).not_to be_nil + end + end +end diff --git a/spec/lib/monitoring/pub_sub_spec.rb b/spec/lib/monitoring/pub_sub_spec.rb new file mode 100644 index 0000000000..2dd8fa7a9b --- /dev/null +++ b/spec/lib/monitoring/pub_sub_spec.rb @@ -0,0 +1,43 @@ +require 'monitoring/pub_sub' + +describe Monitoring::PubSub do + let(:pubsub) { Monitoring::PubSub.instance } + + it 'unsubscribes blocks from a named event' do + expect { |block| + # Assert that each #subscribe call produces a + # unique subscriber to event "A". + a_sub_1 = pubsub.subscribe("A", &block) + a_sub_2 = pubsub.subscribe("A", &block) + expect(a_sub_1).not_to equal(a_sub_2) + + pubsub.subscribe("B", &block) + + # Arg {e:1} will be yielded twice, once by each + # unique subscriber to event "A". + pubsub.publish("A", {e:1}) + pubsub.publish("B", {e:2}) + + pubsub.unsubscribe("A") + pubsub.publish("A", {e:3}) + pubsub.publish("B", {e:4}) + } + .to yield_successive_args({e:1}, {e:1}, {e:2}, {e:4}) + end + + it 'receives only subscribed events, in order published' do + expect { |block| + names = [ "A", "B", "C" ] + names.each { |name| + pubsub.subscribe(name, &block) + } + + pubsub.publish("B", {e:1}) + pubsub.publish("C", {e:2}) + pubsub.publish("D", {e:3}) + pubsub.publish("A", {e:4}) + } + .to yield_successive_args({e:1}, {e:2}, {e:4}) + end + +end diff --git a/spec/lib/monitoring/query_helper_spec.rb b/spec/lib/monitoring/query_helper_spec.rb new file mode 100644 index 0000000000..b63fecce7c --- /dev/null +++ b/spec/lib/monitoring/query_helper_spec.rb @@ -0,0 +1,46 @@ +require 'monitoring/query_helper' +require 'spec_helper' + +RSpec.describe Monitoring::QueryHelper do + describe '#policy_resource_counts' do + it 'returns the correct resource counts' do + allow(Resource).to receive(:group_and_count).and_return([ + { kind: 'resource1', count: 10 }, + { kind: 'resource2', count: 20 } + ]) + + counts = Monitoring::QueryHelper.instance.policy_resource_counts + + expect(counts).to eq({ 'resource1' => 10, 'resource2' => 20 }) + end + + it 'returns an empty hash if there are no resources' do + allow(Resource).to receive(:group_and_count).and_return([]) + + counts = Monitoring::QueryHelper.instance.policy_resource_counts + + expect(counts).to eq({}) + end + end + + describe '#policy_role_counts' do + it 'returns the correct role counts' do + allow(Role).to receive(:group_and_count).and_return([ + { kind: 'role1', count: 5 }, + { kind: 'role2', count: 15 } + ]) + + counts = Monitoring::QueryHelper.instance.policy_role_counts + + expect(counts).to eq({ 'role1' => 5, 'role2' => 15 }) + end + + it 'returns an empty hash if there are no roles' do + allow(Role).to receive(:group_and_count).and_return([]) + + counts = Monitoring::QueryHelper.instance.policy_role_counts + + expect(counts).to eq({}) + end + end +end