Skip to content

Commit

Permalink
Split the web and metrics service endpoints (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathanio authored Aug 14, 2024
2 parents ab67c09 + f9da282 commit 002491e
Show file tree
Hide file tree
Showing 22 changed files with 720 additions and 297 deletions.
3 changes: 2 additions & 1 deletion charts/dashboard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ configure it through the `values.yaml` file.
| podDisruptionBudget.annotations | object | `{}` | Set any additional annotations which should be added to the PodDisruptionBudget resource |
| podDisruptionBudget.labels | object | `{}` | Set any additional labels which should be added to the PodDisruptionBudget resource |
| service.type | string | `"ClusterIP"` | Set whether the Service should be a ClusterIP or NodeIP |
| service.port | int | `8080` | Set the TCP port the Service should be configured to listen on |
| service.webPort | int | `8080` | Set the TCP port the web service should be configured to listen on |
| service.metricsPort | int | `8888` | Set the TCP port the metrics service should be configured to listen on |
| service.annotations | object | `{}` | Set any additional annotations which should be added to the Ingress resource |
| service.labels | object | `{}` | Set any additional labels which should be added to the Ingress resource |
| ingress.create | bool | `false` | Set whether or not to create the Ingress for the dashboard Service |
Expand Down
14 changes: 9 additions & 5 deletions charts/dashboard/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,16 @@ spec:
- --log-level={{ .Values.pod.logging.level }}
- --log-json={{ if .Values.pod.logging.json }}true{{ else }}false{{ end }}
- --address=0.0.0.0
- --web-port={{ .Values.service.webPort }}
- --metrics-port={{ .Values.service.metricsPort }}
{{- range .Values.pod.extraArgs }}
- {{ . }}
{{- end }}
ports:
- containerPort: {{ .Values.service.port }}
name: http
- containerPort: {{ .Values.service.webPort }}
name: web
- containerPort: {{ .Values.service.metricsPort }}
name: metrics
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
Expand All @@ -81,7 +85,7 @@ spec:
startupProbe:
httpGet:
path: /alive
port: http
port: metrics
{{- with .Values.pod.probes.startup }}
periodSeconds: {{ .periodSeconds }}
initialDelaySeconds: {{ .initialDelaySeconds }}
Expand All @@ -94,7 +98,7 @@ spec:
livenessProbe:
httpGet:
path: /alive
port: http
port: metrics
{{- with .Values.pod.probes.liveness }}
periodSeconds: {{ .periodSeconds }}
initialDelaySeconds: {{ .initialDelaySeconds }}
Expand All @@ -107,7 +111,7 @@ spec:
readinessProbe:
httpGet:
path: /healthz
port: http
port: metric
{{- with .Values.pod.probes.readiness }}
periodSeconds: {{ .periodSeconds }}
initialDelaySeconds: {{ .initialDelaySeconds }}
Expand Down
2 changes: 1 addition & 1 deletion charts/dashboard/templates/network-policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ spec:
cidr: 0.0.0.0/0
ports:
- protocol: TCP
port: http
port: web
{{- end }}
2 changes: 1 addition & 1 deletion charts/dashboard/templates/service-monitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ metadata:
spec:
endpoints:
- path: /metrics
port: http
port: metrics
interval: {{ .Values.serviceMonitor.interval }}
namespaceSelector:
matchNames:
Expand Down
10 changes: 7 additions & 3 deletions charts/dashboard/templates/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ spec:
# of these ports are also used for join operations
publishNotReadyAddresses: true
ports:
- name: http
port: {{ .Values.service.port }}
- name: web
port: {{ .Values.service.webPort }}
protocol: TCP
targetPort: http
targetPort: web
- name: metrics
port: {{ .Values.service.metricsPort }}
protocol: TCP
targetPort: metrics
16 changes: 10 additions & 6 deletions charts/dashboard/templates/stateful-set.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,26 @@ spec:
fsGroup: 65534
containers:
- name: {{ .Chart.Name | quote }}
image: "{{ .Values.pod.image.repository }}:v{{ .Values.pod.image.tag | default .Chart.AppVersion }}"
image: "{{ .Values.pod.image.repository }}:{{ .Values.pod.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.pod.image.pullPolicy }}
args:
- serve
- --log-level={{ .Values.pod.logging.level }}
- --log-json={{ if .Values.pod.logging.json }}true{{ else }}false{{ end }}
- --address=0.0.0.0
- --web-port={{ .Values.service.webPort }}
- --metrics-port={{ .Values.service.metricsPort }}
{{- range .Values.pod.extraArgs }}
- {{ . }}
{{- end }}
volumeMounts:
- name: {{ include "dashboard.fullname" . }}-vault
mountPath: /data/vault
ports:
- containerPort: {{ .Values.service.port }}
name: http
- containerPort: {{ .Values.service.webPort }}
name: web
- containerPort: {{ .Values.service.metricsPort }}
name: metrics
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
Expand All @@ -94,7 +98,7 @@ spec:
startupProbe:
httpGet:
path: /alive
port: http
port: metrics
{{- with .Values.pod.probes.startup }}
periodSeconds: {{ .periodSeconds }}
initialDelaySeconds: {{ .initialDelaySeconds }}
Expand All @@ -107,7 +111,7 @@ spec:
livenessProbe:
httpGet:
path: /alive
port: http
port: metrics
{{- with .Values.pod.probes.liveness }}
periodSeconds: {{ .periodSeconds }}
initialDelaySeconds: {{ .initialDelaySeconds }}
Expand All @@ -120,7 +124,7 @@ spec:
readinessProbe:
httpGet:
path: /healthz
port: http
port: metrics
{{- with .Values.pod.probes.readiness }}
periodSeconds: {{ .periodSeconds }}
initialDelaySeconds: {{ .initialDelaySeconds }}
Expand Down
6 changes: 4 additions & 2 deletions charts/dashboard/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ podDisruptionBudget:
service:
# -- Set whether the Service should be a ClusterIP or NodeIP
type: ClusterIP
# -- Set the TCP port the Service should be configured to listen on
port: 8080
# -- Set the TCP port the web service should be configured to listen on
webPort: 8080
# -- Set the TCP port the metrics service should be configured to listen on
metricsPort: 8888
# -- Set any additional annotations which should be added to the Ingress
# resource
annotations: {}
Expand Down
6 changes: 4 additions & 2 deletions config/serve.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
---
web:
endpoints:
bind:
address: 0.0.0.0
port: 8080
ports:
web: 8080
metrics: 8888
proxies:
- '::1'
- '172.27.4.188'
Expand Down
148 changes: 114 additions & 34 deletions internal/cmd/serve.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
package cmd

import (
"context"
"fmt"
"log/slog"
"os/signal"
"strings"
"syscall"
"time"

"github.com/MakeNowJust/heredoc/v2"
"github.com/gin-gonic/gin"
"github.com/spf13/cobra"
"github.com/spf13/viper"

"github.com/n3tuk/dashboard/internal/config"
"github.com/n3tuk/dashboard/internal/logger"
"github.com/n3tuk/dashboard/internal/serve"
"github.com/n3tuk/dashboard/internal/serve/metrics"
"github.com/n3tuk/dashboard/internal/serve/web"
)

const (
Expand All @@ -23,8 +30,10 @@ var (
// host is the hostname or IPv4/IPv6 address to bind the service to on
// startup.
host = "localhost"
// port is the TCP port number to bind the web service to on startup.
port = 8080
// webPort is the TCP port number to bind the web service to on startup.
webPort = 8080
// metricsPort is the TCP port number to bind the metrics service to on startup.
metricsPort = 8888

// trustedProxies is a list of IPv4 and/or IPv6 CIDRs which should be trusted
// for providing the remote Client address.
Expand All @@ -42,14 +51,30 @@ var (
// idleTimeout is the maximum time to read the headers for the request from
// the client.
idleTimeout = 30
// shutdownTimeout is the maximum time to wait for the web service to finish
// handling all currently active requests before shutting down.
shutdownTimeout = 30
// shutdownMetrics is the maximum time to wait to shut down the metrics
// service and therefore shut down the application once the web service is
// closed.
shutdownMetrics = 5

// loggerConfig provides the application information which will be used for
// every log line to help provide context to all logs.
loggerConfig = &map[string]string{
"name": Name,
"version": Version,
"commit": Commit,
"arch": Architecture,
"build-date": BuildDate,
}

// serveCmd represents the serve command for the dashboard application, and will
// provide the setup and arguments needed for the application to start the web
// service and start processing events.
serveCmd = &cobra.Command{
Use: "serve [options]",
Aliases: []string{"web"},
Short: "Start the web server to serve dashboard web requests",
Use: "serve [options]",
Short: "Start the web server to serve dashboard web requests",
Long: heredoc.Doc(`
dashboard serve provides the web service which runs the processing of
events submitted to the dashboard, to be saved and pushed out to the
Expand All @@ -59,9 +84,7 @@ var (
// Add blank line at the top for enforced extra spacing in the output
Example: strings.TrimRight(heredoc.Doc(`
$ dashboard serve \
--address 0.0.0.0 \
--port 8080
$ dashboard serve --address 0.0.0.0 --web-port 8080 --metrics-port 8081
`), "\n"),

RunE: runServe,
Expand All @@ -74,34 +97,42 @@ func init() {
flags := serveCmd.Flags()

// Flags and default configuration for binding the web service
viper.SetDefault("web.bind.address", host)
viper.SetDefault("endpoints.bind.address", host)
flags.StringP("address", "a", host, "Address to bind the server to")
_ = viper.BindPFlag("web.bind.address", flags.Lookup("address"))
_ = viper.BindPFlag("endpoints.bind.address", flags.Lookup("address"))

viper.SetDefault("endpoints.bind.port.web", webPort)
flags.IntP("web-port", "p", webPort, "The port to bind the web service to")
_ = viper.BindPFlag("endpoints.bind.port.web", flags.Lookup("web-port"))

viper.SetDefault("web.bind.port", port)
flags.IntP("port", "p", port, "The port to bind the server to")
_ = viper.BindPFlag("web.bind.port", flags.Lookup("port"))
viper.SetDefault("endpoints.bind.port.metrics", metricsPort)
flags.IntP("metrics-port", "m", metricsPort, "The port to bind the metrics service to")
_ = viper.BindPFlag("endpoints.bind.port.metrics", flags.Lookup("metrics-port"))

viper.SetDefault("web.proxies", trustedProxies)
viper.SetDefault("endpoints.proxies", trustedProxies)
flags.StringSlice("proxies", trustedProxies, "A comma-separated list of CIDRs where trusted proxies are used")
_ = viper.BindPFlag("web.proxies", flags.Lookup("proxies"))
_ = viper.BindPFlag("endpoints.proxies", flags.Lookup("proxies"))

// Flags and default configurations for the web service timeouts
viper.SetDefault("web.timeouts.headers", headersTimeout)
viper.SetDefault("endpoints.timeouts.headers", headersTimeout)
flags.Int("headers-timeout", headersTimeout, "Timeout (in seconds) to read the headers for the request")
_ = viper.BindPFlag("web.timeouts.headers", flags.Lookup("headers-timeout"))
_ = viper.BindPFlag("endpoints.timeouts.headers", flags.Lookup("headers-timeout"))

viper.SetDefault("web.timeouts.read", readTimeout)
viper.SetDefault("endpoints.timeouts.read", readTimeout)
flags.Int("read-timeout", readTimeout, "Timeout (in seconds) to read the full request, after the headers")
_ = viper.BindPFlag("web.timeouts.read", flags.Lookup("read-timeout"))
_ = viper.BindPFlag("endpoints.timeouts.read", flags.Lookup("read-timeout"))

viper.SetDefault("web.timeouts.write", writeTimeout)
viper.SetDefault("endpoints.timeouts.write", writeTimeout)
flags.Int("write-timeout", writeTimeout, "Timeout (in seconds) to write the full response, including the body")
_ = viper.BindPFlag("web.timeouts.write", flags.Lookup("write-timeout"))
_ = viper.BindPFlag("endpoints.timeouts.write", flags.Lookup("write-timeout"))

viper.SetDefault("web.timeouts.idle", idleTimeout)
viper.SetDefault("endpoints.timeouts.idle", idleTimeout)
flags.Int("idle-timeout", idleTimeout, "Timeout (in seconds) to keep a connection open between requests")
_ = viper.BindPFlag("web.timeouts.idle", flags.Lookup("idle-timeout"))
_ = viper.BindPFlag("endpoints.timeouts.idle", flags.Lookup("idle-timeout"))

viper.SetDefault("endpoints.timeouts.shutdown", shutdownTimeout)
flags.Int("shutdown-timeout", shutdownTimeout, "Timeout (in seconds) to wait for requests to finish")
_ = viper.BindPFlag("endpoints.timeouts.shutdown", flags.Lookup("shutdown-timeout"))

rootCmd.AddCommand(serveCmd)
}
Expand All @@ -111,24 +142,73 @@ func init() {
// services, and waiting for events to be sent to it for processing. If there
// was an error processing the configuration or initialising the web service or
// its connections, an `error` will be returned.
//
//nolint:funlen // ignore
func runServe(_ *cobra.Command, _ []string) error {
err := config.Load(serveConfigName, configFile)
if err != nil {
//nolint:revive,stylecheck // new-line is required to break error and usage
return fmt.Errorf("\n %w\n", err)
}

// As this is a web service, include more information about the release and
// build environment to make it easier to track and debug changes from logs
logger.Start(&map[string]string{
"name": Name,
"version": Version,
"commit": Commit,
"arch": Architecture,
"build-date": BuildDate,
})
gin.SetMode(gin.ReleaseMode)
logger.Start(loggerConfig)

// Create a context that listens for the interrupt signal from the Operating
// System so we can capture it and then trigger a graceful shutdown
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()

m := metrics.NewService()
w := web.NewService()

e := make(chan error)

// Start the web service first as the metrics service will report the health
// of the service, so we should be ready to receive requests before the
// service is reporting as healthy
go w.Start(e)
go m.Start(e)

// Restore default behaviour on the interrupt signal and notify user of shutdown.
select {
case <-ctx.Done():
slog.Info("Shutting down dashboard gracefully")
case err := <-e:
slog.Error(
"Shutting down dashboard due to startup failure",
slog.Group("error",
slog.String("message", err.Error()),
),
)
}

serve.Run()
m.PrepareShutdown()

shutdown := time.Duration(viper.GetInt("endpoints.timeouts.shutdown")) * time.Second
if err := w.Shutdown(shutdown); err != nil {
slog.Error(
"Forced to shut down web service ungracefully",
slog.Group("error",
slog.String("message", err.Error()),
),
)
}

// Only once all the above steps are processed, allow the signals to be
// processed again, allowing the application to be forcefully terminated, but
// the client connections have been cleanly closed, so this is acceptable now
stop()

shutdown = time.Duration(shutdownMetrics) * time.Second
if err := m.Shutdown(shutdown); err != nil {
slog.Error(
"Forced to shut down metrics service ungracefully",
slog.Group("error",
slog.String("message", err.Error()),
),
)
}

return nil
}
Loading

0 comments on commit 002491e

Please sign in to comment.