diff --git a/.golangci.yml b/.golangci.yml index f4fbed2f51..88f10bb240 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -59,4 +59,4 @@ linters: - nonamedreturns issues: - new-from-rev: 3221f76 \ No newline at end of file + new-from-rev: 9af4477 \ No newline at end of file diff --git a/extension/agenthealth/config.go b/extension/agenthealth/config.go index dd1f94c06c..53c6901fff 100644 --- a/extension/agenthealth/config.go +++ b/extension/agenthealth/config.go @@ -10,8 +10,9 @@ import ( ) type Config struct { - IsUsageDataEnabled bool `mapstructure:"is_usage_data_enabled"` - Stats agent.StatsConfig `mapstructure:"stats"` + IsUsageDataEnabled bool `mapstructure:"is_usage_data_enabled"` + Stats *agent.StatsConfig `mapstructure:"stats,omitempty"` + IsStatusCodeEnabled bool `mapstructure:"is_status_code_enabled,omitempty"` } var _ component.Config = (*Config)(nil) diff --git a/extension/agenthealth/config_test.go b/extension/agenthealth/config_test.go index ee0ef301f6..0e97928ae7 100644 --- a/extension/agenthealth/config_test.go +++ b/extension/agenthealth/config_test.go @@ -26,11 +26,11 @@ func TestLoadConfig(t *testing.T) { }, { id: component.NewIDWithName(TypeStr, "1"), - want: &Config{IsUsageDataEnabled: false, Stats: agent.StatsConfig{Operations: []string{agent.AllowAllOperations}}}, + want: &Config{IsUsageDataEnabled: false, Stats: &agent.StatsConfig{Operations: []string{agent.AllowAllOperations}}}, }, { id: component.NewIDWithName(TypeStr, "2"), - want: &Config{IsUsageDataEnabled: true, Stats: agent.StatsConfig{Operations: []string{"ListBuckets"}}}, + want: &Config{IsUsageDataEnabled: true, Stats: &agent.StatsConfig{Operations: []string{"ListBuckets"}}}, }, } for _, testCase := range testCases { diff --git a/extension/agenthealth/extension.go b/extension/agenthealth/extension.go index 14ab08eb57..2f1ca916b1 100644 --- a/extension/agenthealth/extension.go +++ b/extension/agenthealth/extension.go @@ -9,6 +9,7 @@ import ( "go.uber.org/zap" "github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats" + "github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent" "github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/useragent" ) @@ -24,11 +25,29 @@ var _ awsmiddleware.Extension = (*agentHealth)(nil) func (ah *agentHealth) Handlers() ([]awsmiddleware.RequestHandler, []awsmiddleware.ResponseHandler) { var responseHandlers []awsmiddleware.ResponseHandler requestHandlers := []awsmiddleware.RequestHandler{useragent.NewHandler(ah.cfg.IsUsageDataEnabled)} - if ah.cfg.IsUsageDataEnabled { - req, res := stats.NewHandlers(ah.logger, ah.cfg.Stats) - requestHandlers = append(requestHandlers, req...) - responseHandlers = append(responseHandlers, res...) + + if !ah.cfg.IsUsageDataEnabled { + ah.logger.Debug("Usage data is disabled, skipping stats handlers") + return requestHandlers, responseHandlers + } + + if ah.cfg == nil { + return nil, nil + } + statusCodeEnabled := ah.cfg.IsStatusCodeEnabled + + var statsResponseHandlers []awsmiddleware.ResponseHandler + var statsRequestHandlers []awsmiddleware.RequestHandler + + if ah.cfg.Stats != nil { + statsRequestHandlers, statsResponseHandlers = stats.NewHandlers(ah.logger, *ah.cfg.Stats, statusCodeEnabled, true) + } else { + statsRequestHandlers, statsResponseHandlers = stats.NewHandlers(ah.logger, agent.StatsConfig{}, statusCodeEnabled, false) } + + requestHandlers = append(requestHandlers, statsRequestHandlers...) + responseHandlers = append(responseHandlers, statsResponseHandlers...) + return requestHandlers, responseHandlers } diff --git a/extension/agenthealth/extension_test.go b/extension/agenthealth/extension_test.go index 504dc8c50e..c264e86440 100644 --- a/extension/agenthealth/extension_test.go +++ b/extension/agenthealth/extension_test.go @@ -14,13 +14,13 @@ import ( func TestExtension(t *testing.T) { ctx := context.Background() - cfg := &Config{IsUsageDataEnabled: true} + cfg := &Config{IsUsageDataEnabled: true, IsStatusCodeEnabled: true} extension := NewAgentHealth(zap.NewNop(), cfg) assert.NotNil(t, extension) assert.NoError(t, extension.Start(ctx, componenttest.NewNopHost())) requestHandlers, responseHandlers := extension.Handlers() // user agent, client stats, stats - assert.Len(t, requestHandlers, 3) + assert.Len(t, requestHandlers, 2) // client stats assert.Len(t, responseHandlers, 1) cfg.IsUsageDataEnabled = false diff --git a/extension/agenthealth/factory.go b/extension/agenthealth/factory.go index e8e97587a5..0059eb3c67 100644 --- a/extension/agenthealth/factory.go +++ b/extension/agenthealth/factory.go @@ -28,7 +28,7 @@ func NewFactory() extension.Factory { func createDefaultConfig() component.Config { return &Config{ IsUsageDataEnabled: true, - Stats: agent.StatsConfig{ + Stats: &agent.StatsConfig{ Operations: []string{agent.AllowAllOperations}, }, } diff --git a/extension/agenthealth/factory_test.go b/extension/agenthealth/factory_test.go index 4899dfb425..8081a0f06e 100644 --- a/extension/agenthealth/factory_test.go +++ b/extension/agenthealth/factory_test.go @@ -16,7 +16,7 @@ import ( func TestCreateDefaultConfig(t *testing.T) { cfg := NewFactory().CreateDefaultConfig() - assert.Equal(t, &Config{IsUsageDataEnabled: true, Stats: agent.StatsConfig{Operations: []string{agent.AllowAllOperations}}}, cfg) + assert.Equal(t, &Config{IsUsageDataEnabled: true, Stats: &agent.StatsConfig{Operations: []string{agent.AllowAllOperations}}}, cfg) assert.NoError(t, componenttest.CheckConfigStruct(cfg)) } diff --git a/extension/agenthealth/handler/stats/agent/agent.go b/extension/agenthealth/handler/stats/agent/agent.go index 83237d54e6..e2e9bc50d4 100644 --- a/extension/agenthealth/handler/stats/agent/agent.go +++ b/extension/agenthealth/handler/stats/agent/agent.go @@ -15,28 +15,29 @@ const ( ) type Stats struct { - CpuPercent *float64 `json:"cpu,omitempty"` - MemoryBytes *uint64 `json:"mem,omitempty"` - FileDescriptorCount *int32 `json:"fd,omitempty"` - ThreadCount *int32 `json:"th,omitempty"` - LatencyMillis *int64 `json:"lat,omitempty"` - PayloadBytes *int `json:"load,omitempty"` - StatusCode *int `json:"code,omitempty"` - SharedConfigFallback *int `json:"scfb,omitempty"` - ImdsFallbackSucceed *int `json:"ifs,omitempty"` - AppSignals *int `json:"as,omitempty"` - EnhancedContainerInsights *int `json:"eci,omitempty"` - RunningInContainer *int `json:"ric,omitempty"` - RegionType *string `json:"rt,omitempty"` - Mode *string `json:"m,omitempty"` - EntityRejected *int `json:"ent,omitempty"` + CPUPercent *float64 `json:"cpu,omitempty"` + MemoryBytes *uint64 `json:"mem,omitempty"` + FileDescriptorCount *int32 `json:"fd,omitempty"` + ThreadCount *int32 `json:"th,omitempty"` + LatencyMillis *int64 `json:"lat,omitempty"` + PayloadBytes *int `json:"load,omitempty"` + StatusCode *int `json:"code,omitempty"` + SharedConfigFallback *int `json:"scfb,omitempty"` + ImdsFallbackSucceed *int `json:"ifs,omitempty"` + AppSignals *int `json:"as,omitempty"` + EnhancedContainerInsights *int `json:"eci,omitempty"` + RunningInContainer *int `json:"ric,omitempty"` + RegionType *string `json:"rt,omitempty"` + Mode *string `json:"m,omitempty"` + EntityRejected *int `json:"ent,omitempty"` + StatusCodes map[string][5]int `json:"codes,omitempty"` //represents status codes 200,400,408,413,429, } // Merge the other Stats into the current. If the field is not nil, // then it'll overwrite the existing one. func (s *Stats) Merge(other Stats) { - if other.CpuPercent != nil { - s.CpuPercent = other.CpuPercent + if other.CPUPercent != nil { + s.CPUPercent = other.CPUPercent } if other.MemoryBytes != nil { s.MemoryBytes = other.MemoryBytes @@ -80,6 +81,28 @@ func (s *Stats) Merge(other Stats) { if other.EntityRejected != nil { s.EntityRejected = other.EntityRejected } + if other.StatusCodes == nil { + return + } + + if s.StatusCodes == nil { + s.StatusCodes = make(map[string][5]int) + } + + for key, value := range other.StatusCodes { + if existing, ok := s.StatusCodes[key]; ok { + s.StatusCodes[key] = [5]int{ + existing[0] + value[0], // 200 + existing[1] + value[1], // 400 + existing[2] + value[2], // 408 + existing[3] + value[3], // 413 + existing[4] + value[4], // 429 + } + } else { + s.StatusCodes[key] = value + } + } + } func (s *Stats) Marshal() (string, error) { @@ -104,6 +127,27 @@ func (of OperationsFilter) IsAllowed(operationName string) bool { return of.allowAll || of.operations.Contains(operationName) } +var StatusCodeOperations = []string{ // all the operations that are allowed + "DescribeInstances", + "DescribeTags", + "DescribeTasks", + "DescribeVolumes", + "DescribeContainerInstances", + "DescribeServices", + "DescribeTaskDefinition", + "ListServices", + "ListTasks", + "CreateLogGroup", + "CreateLogStream", +} + +type StatsConfig struct { + // Operations are the allowed operation names to gather stats for. + Operations []string `mapstructure:"operations,omitempty"` + // UsageFlags are the usage flags to set on start up. + UsageFlags map[Flag]any `mapstructure:"usage_flags,omitempty"` +} + func NewOperationsFilter(operations ...string) OperationsFilter { allowed := collections.NewSet[string](operations...) return OperationsFilter{ @@ -112,9 +156,12 @@ func NewOperationsFilter(operations ...string) OperationsFilter { } } -type StatsConfig struct { - // Operations are the allowed operation names to gather stats for. - Operations []string `mapstructure:"operations,omitempty"` - // UsageFlags are the usage flags to set on start up. - UsageFlags map[Flag]any `mapstructure:"usage_flags,omitempty"` +// NewStatusCodeOperationsFilter creates a new filter for allowed operations and status codes. +func NewStatusCodeOperationsFilter() OperationsFilter { + allowed := make(map[string]struct{}, len(StatusCodeOperations)) + + return OperationsFilter{ + operations: allowed, + allowAll: false, + } } diff --git a/extension/agenthealth/handler/stats/agent/agent_test.go b/extension/agenthealth/handler/stats/agent/agent_test.go index c379facc19..b53f8787d8 100644 --- a/extension/agenthealth/handler/stats/agent/agent_test.go +++ b/extension/agenthealth/handler/stats/agent/agent_test.go @@ -11,17 +11,17 @@ import ( ) func TestMerge(t *testing.T) { - stats := &Stats{CpuPercent: aws.Float64(1.2)} - assert.EqualValues(t, 1.2, *stats.CpuPercent) + stats := &Stats{CPUPercent: aws.Float64(1.2)} + assert.EqualValues(t, 1.2, *stats.CPUPercent) assert.Nil(t, stats.MemoryBytes) stats.Merge(Stats{ - CpuPercent: aws.Float64(1.3), + CPUPercent: aws.Float64(1.3), MemoryBytes: aws.Uint64(123), }) - assert.EqualValues(t, 1.3, *stats.CpuPercent) + assert.EqualValues(t, 1.3, *stats.CPUPercent) assert.EqualValues(t, 123, *stats.MemoryBytes) stats.Merge(Stats{ - CpuPercent: aws.Float64(1.5), + CPUPercent: aws.Float64(1.5), MemoryBytes: aws.Uint64(133), FileDescriptorCount: aws.Int32(456), ThreadCount: aws.Int32(789), @@ -36,7 +36,7 @@ func TestMerge(t *testing.T) { RegionType: aws.String("RegionType"), Mode: aws.String("Mode"), }) - assert.EqualValues(t, 1.5, *stats.CpuPercent) + assert.EqualValues(t, 1.5, *stats.CPUPercent) assert.EqualValues(t, 133, *stats.MemoryBytes) assert.EqualValues(t, 456, *stats.FileDescriptorCount) assert.EqualValues(t, 789, *stats.ThreadCount) @@ -52,6 +52,76 @@ func TestMerge(t *testing.T) { assert.EqualValues(t, "Mode", *stats.Mode) } +func TestMergeWithStatusCodes(t *testing.T) { + stats := &Stats{ + StatusCodes: map[string][5]int{ + "operation1": {1, 2, 3, 4, 5}, + }, + } + + stats.Merge(Stats{ + StatusCodes: map[string][5]int{ + "operation1": {2, 3, 4, 5, 6}, // Existing operation with new values + "operation2": {0, 1, 2, 3, 4}, // New operation + }, + }) + + assert.Equal(t, [5]int{3, 5, 7, 9, 11}, stats.StatusCodes["operation1"]) // Values should sum + assert.Equal(t, [5]int{0, 1, 2, 3, 4}, stats.StatusCodes["operation2"]) // New operation added + + stats.Merge(Stats{ + StatusCodes: nil, + }) + + assert.Equal(t, [5]int{3, 5, 7, 9, 11}, stats.StatusCodes["operation1"]) + assert.Equal(t, [5]int{0, 1, 2, 3, 4}, stats.StatusCodes["operation2"]) +} + +func TestMarshalWithStatusCodes(t *testing.T) { + testCases := map[string]struct { + stats *Stats + want string + }{ + "WithEmptyStatusCodes": { + stats: &Stats{ + StatusCodes: map[string][5]int{}, + }, + want: "", + }, + "WithStatusCodes": { + stats: &Stats{ + StatusCodes: map[string][5]int{ + "operation1": {1, 2, 3, 4, 5}, + "operation2": {0, 1, 2, 3, 4}, + }, + }, + want: `"codes":{"operation1":[1,2,3,4,5],"operation2":[0,1,2,3,4]}`, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + got, err := testCase.stats.Marshal() + assert.NoError(t, err) + assert.Contains(t, got, testCase.want) + }) + } +} + +func TestMergeFullWithStatusCodes(t *testing.T) { + stats := &Stats{ + CPUPercent: aws.Float64(1.0), + StatusCodes: map[string][5]int{"operation1": {1, 0, 0, 0, 0}}, + } + stats.Merge(Stats{ + CPUPercent: aws.Float64(2.0), + StatusCodes: map[string][5]int{"operation1": {0, 1, 0, 0, 0}, "operation2": {1, 1, 1, 1, 1}}, + }) + + assert.Equal(t, 2.0, *stats.CPUPercent) + assert.Equal(t, [5]int{1, 1, 0, 0, 0}, stats.StatusCodes["operation1"]) + assert.Equal(t, [5]int{1, 1, 1, 1, 1}, stats.StatusCodes["operation2"]) +} + func TestMarshal(t *testing.T) { testCases := map[string]struct { stats *Stats @@ -63,7 +133,7 @@ func TestMarshal(t *testing.T) { }, "WithPartial": { stats: &Stats{ - CpuPercent: aws.Float64(1.2), + CPUPercent: aws.Float64(1.2), MemoryBytes: aws.Uint64(123), ThreadCount: aws.Int32(789), PayloadBytes: aws.Int(5678), @@ -72,7 +142,7 @@ func TestMarshal(t *testing.T) { }, "WithFull": { stats: &Stats{ - CpuPercent: aws.Float64(1.2), + CPUPercent: aws.Float64(1.2), MemoryBytes: aws.Uint64(123), FileDescriptorCount: aws.Int32(456), ThreadCount: aws.Int32(789), diff --git a/extension/agenthealth/handler/stats/handler.go b/extension/agenthealth/handler/stats/handler.go index 7e12f12b5c..50ff9f3858 100644 --- a/extension/agenthealth/handler/stats/handler.go +++ b/extension/agenthealth/handler/stats/handler.go @@ -21,12 +21,36 @@ const ( headerKeyAgentStats = "X-Amz-Agent-Stats" ) -func NewHandlers(logger *zap.Logger, cfg agent.StatsConfig) ([]awsmiddleware.RequestHandler, []awsmiddleware.ResponseHandler) { +func NewHandlers(logger *zap.Logger, cfg agent.StatsConfig, statusCodeEnabled bool, agentStatsEnabled bool) ([]awsmiddleware.RequestHandler, []awsmiddleware.ResponseHandler) { + var requestHandlers []awsmiddleware.RequestHandler + var responseHandlers []awsmiddleware.ResponseHandler + var statsProviders []agent.StatsProvider + + if !statusCodeEnabled && !agentStatsEnabled { + return nil, nil + } + + statusCodeFilter := agent.NewStatusCodeOperationsFilter() + statusCodeHandler := provider.NewStatusCodeHandler(statusCodeFilter) + + if statusCodeEnabled { + responseHandlers = append(responseHandlers, statusCodeHandler) + statsProviders = append(statsProviders, provider.GetStatsProvider()) + } + + if agentStatsEnabled { + clientStats := client.NewHandler(agent.NewOperationsFilter()) + statsProviders = append(statsProviders, clientStats, provider.GetProcessStats(), provider.GetFlagsStats()) + responseHandlers = append(responseHandlers, clientStats) + requestHandlers = append(requestHandlers, clientStats) + + } filter := agent.NewOperationsFilter(cfg.Operations...) - clientStats := client.NewHandler(filter) - stats := newStatsHandler(logger, filter, []agent.StatsProvider{clientStats, provider.GetProcessStats(), provider.GetFlagsStats()}) + stats := newStatsHandler(logger, filter, statsProviders) + requestHandlers = append(requestHandlers, stats) + agent.UsageFlags().SetValues(cfg.UsageFlags) - return []awsmiddleware.RequestHandler{stats, clientStats}, []awsmiddleware.ResponseHandler{clientStats} + return requestHandlers, responseHandlers } type statsHandler struct { diff --git a/extension/agenthealth/handler/stats/handler_test.go b/extension/agenthealth/handler/stats/handler_test.go index f40bebd481..f96604bded 100644 --- a/extension/agenthealth/handler/stats/handler_test.go +++ b/extension/agenthealth/handler/stats/handler_test.go @@ -40,12 +40,16 @@ func TestStatsHandler(t *testing.T) { StatusCode: aws.Int(200), ImdsFallbackSucceed: aws.Int(1), SharedConfigFallback: aws.Int(1), + StatusCodes: map[string][5]int{ + "pmd": {1, 0, 0, 0, 0}, + "di": {0, 1, 0, 0, 0}, + }, } handler := newStatsHandler( zap.NewNop(), agent.NewOperationsFilter(), []agent.StatsProvider{ - newMockStatsProvider(&agent.Stats{CpuPercent: aws.Float64(1.2)}), + newMockStatsProvider(&agent.Stats{CPUPercent: aws.Float64(1.2)}), newMockStatsProvider(&agent.Stats{MemoryBytes: aws.Uint64(123)}), newMockStatsProvider(stats), }, @@ -59,15 +63,31 @@ func TestStatsHandler(t *testing.T) { assert.Equal(t, "", req.Header.Get(headerKeyAgentStats)) handler.filter = agent.NewOperationsFilter(agent.AllowAllOperations) handler.HandleRequest(ctx, req) - assert.Equal(t, `"cpu":1.2,"mem":123,"fd":456,"th":789,"lat":1234,"load":5678,"code":200,"scfb":1,"ifs":1`, req.Header.Get(headerKeyAgentStats)) + assert.Equal(t, `"cpu":1.2,"mem":123,"fd":456,"th":789,"lat":1234,"load":5678,"code":200,"scfb":1,"ifs":1,"codes":{"di":[0,1,0,0,0],"pmd":[1,0,0,0,0]}`, req.Header.Get(headerKeyAgentStats)) stats.StatusCode = aws.Int(404) stats.LatencyMillis = nil handler.HandleRequest(ctx, req) - assert.Equal(t, `"cpu":1.2,"mem":123,"fd":456,"th":789,"load":5678,"code":404,"scfb":1,"ifs":1`, req.Header.Get(headerKeyAgentStats)) + assert.Equal(t, `"cpu":1.2,"mem":123,"fd":456,"th":789,"load":5678,"code":404,"scfb":1,"ifs":1,"codes":{"di":[0,1,0,0,0],"pmd":[1,0,0,0,0]}`, req.Header.Get(headerKeyAgentStats)) } -func TestNewHandlers(t *testing.T) { - requestHandlers, responseHandlers := NewHandlers(zap.NewNop(), agent.StatsConfig{}) +func TestNewHandlersWithStatusCodeOnly(t *testing.T) { + requestHandlers, responseHandlers := NewHandlers(zap.NewNop(), agent.StatsConfig{}, true, false) + assert.Len(t, requestHandlers, 1) + assert.Len(t, responseHandlers, 1) +} +func TestNewHandlersWithAgentStatsOnly(t *testing.T) { + requestHandlers, responseHandlers := NewHandlers(zap.NewNop(), agent.StatsConfig{}, false, true) assert.Len(t, requestHandlers, 2) assert.Len(t, responseHandlers, 1) } + +func TestNewHandlersWithStatusCodeAndAgenthStats(t *testing.T) { + requestHandlers, responseHandlers := NewHandlers(zap.NewNop(), agent.StatsConfig{}, true, true) + assert.Len(t, requestHandlers, 2) + assert.Len(t, responseHandlers, 2) +} +func TestNewHandlersWithoutStatusCodeAndAgenthStats(t *testing.T) { + requestHandlers, responseHandlers := NewHandlers(zap.NewNop(), agent.StatsConfig{}, false, false) + assert.Len(t, requestHandlers, 0) + assert.Len(t, responseHandlers, 0) +} diff --git a/extension/agenthealth/handler/stats/provider/process.go b/extension/agenthealth/handler/stats/provider/process.go index 4e88ebbee5..e6687c9188 100644 --- a/extension/agenthealth/handler/stats/provider/process.go +++ b/extension/agenthealth/handler/stats/provider/process.go @@ -75,7 +75,7 @@ func (p *processStats) updateLoop() { func (p *processStats) refresh() { p.stats.Store(agent.Stats{ - CpuPercent: p.cpuPercent(), + CPUPercent: p.cpuPercent(), MemoryBytes: p.memoryBytes(), FileDescriptorCount: p.fileDescriptorCount(), ThreadCount: p.threadCount(), diff --git a/extension/agenthealth/handler/stats/provider/process_test.go b/extension/agenthealth/handler/stats/provider/process_test.go index 19fac625fb..7a350265eb 100644 --- a/extension/agenthealth/handler/stats/provider/process_test.go +++ b/extension/agenthealth/handler/stats/provider/process_test.go @@ -64,11 +64,11 @@ func TestProcessStats(t *testing.T) { mock := &mockProcessMetrics{} provider := newProcessStats(mock, time.Millisecond) got := provider.getStats() - assert.NotNil(t, got.CpuPercent) + assert.NotNil(t, got.CPUPercent) assert.NotNil(t, got.MemoryBytes) assert.NotNil(t, got.FileDescriptorCount) assert.NotNil(t, got.ThreadCount) - assert.EqualValues(t, 1, *got.CpuPercent) + assert.EqualValues(t, 1, *got.CPUPercent) assert.EqualValues(t, 2, *got.MemoryBytes) assert.EqualValues(t, 3, *got.FileDescriptorCount) assert.EqualValues(t, 4, *got.ThreadCount) @@ -77,6 +77,25 @@ func TestProcessStats(t *testing.T) { mock.mu.Unlock() provider.refresh() assert.Eventually(t, func() bool { - return provider.getStats() == agent.Stats{} + return isAgentStatsReset(provider.getStats()) }, 5*time.Millisecond, time.Millisecond) } + +func isAgentStatsReset(stats agent.Stats) bool { + return stats.CPUPercent == nil && + stats.MemoryBytes == nil && + stats.FileDescriptorCount == nil && + stats.ThreadCount == nil && + stats.LatencyMillis == nil && + stats.PayloadBytes == nil && + stats.StatusCode == nil && + stats.SharedConfigFallback == nil && + stats.ImdsFallbackSucceed == nil && + stats.AppSignals == nil && + stats.EnhancedContainerInsights == nil && + stats.RunningInContainer == nil && + stats.RegionType == nil && + stats.Mode == nil && + stats.EntityRejected == nil && + len(stats.StatusCodes) == 0 +} diff --git a/extension/agenthealth/handler/stats/provider/statuscode.go b/extension/agenthealth/handler/stats/provider/statuscode.go new file mode 100644 index 0000000000..7c45ea4c99 --- /dev/null +++ b/extension/agenthealth/handler/stats/provider/statuscode.go @@ -0,0 +1,180 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package provider + +import ( + "context" + "net/http" + "sync" + "time" + + "github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware" + + "github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent" +) + +const ( + statusResetInterval = 5 * time.Minute + statusHandlerID = "cloudwatchagent.StatusCodeHandler" +) + +var ( + statsProviderSingleton agent.StatsProvider + statsProviderOnce sync.Once +) + +// SingletonStatsProvider manages a collection of statistics. +type SingletonStatsProvider struct { + mu sync.Mutex + statusCodeStats map[string][5]int +} + +// StatusCodeHandler provides monitoring for status codes per operation. +type StatusCodeHandler struct { + statsProvider *SingletonStatsProvider + filter agent.OperationsFilter + resetTimer *time.Timer + mu sync.Mutex +} + +// NewStatusCodeHandler creates a new instance of StatusCodeHandler. +func NewStatusCodeHandler(filter agent.OperationsFilter) *StatusCodeHandler { + provider := GetStatsProvider().(*SingletonStatsProvider) // Get the singleton provider. + handler := &StatusCodeHandler{ + statsProvider: provider, + filter: filter, + } + handler.startResetTimer() + return handler +} + +// HandleResponse processes the HTTP response to update status code stats. +func (h *StatusCodeHandler) HandleResponse(ctx context.Context, r *http.Response) { + operation := awsmiddleware.GetOperationName(ctx) + if operation == "" { + return + } else if !h.filter.IsAllowed(operation) { + return + } + + operation = GetShortOperationName(operation) + statusCode := r.StatusCode + + h.mu.Lock() + defer h.mu.Unlock() + + // Get or initialize stats + h.statsProvider.mu.Lock() + stats := h.statsProvider.statusCodeStats[operation] + h.statsProvider.mu.Unlock() + + h.updateStatusCodeCount(&stats, statusCode) + + // Update the singleton stats provider + h.statsProvider.UpdateStats(operation, stats) +} + +// ID returns the unique identifier for the handler. +func (h *StatusCodeHandler) ID() string { + return statusHandlerID +} + +// Position specifies the handler's position in the middleware chain. +func (h *StatusCodeHandler) Position() awsmiddleware.HandlerPosition { + return awsmiddleware.After +} + +// GetStatsProvider retrieves the singleton instance of the `agent.StatsProvider`. +func GetStatsProvider() agent.StatsProvider { + statsProviderOnce.Do(func() { + statsProviderSingleton = &SingletonStatsProvider{ + statusCodeStats: make(map[string][5]int), + } + }) + return statsProviderSingleton +} + +// Stats returns the current statistics for a given operation. +func (p *SingletonStatsProvider) Stats(_ string) agent.Stats { + p.mu.Lock() + defer p.mu.Unlock() + + statusCodeMap := make(map[string][5]int, len(p.statusCodeStats)) + for op, stats := range p.statusCodeStats { + statusCodeMap[op] = stats + } + + return agent.Stats{ + StatusCodes: statusCodeMap, + } +} + +// UpdateStats updates the statistics for a given operation. +func (p *SingletonStatsProvider) UpdateStats(operation string, stats [5]int) { + p.mu.Lock() + defer p.mu.Unlock() + + p.statusCodeStats[operation] = stats +} + +// startResetTimer initializes a reset timer to clear stats every 5 minutes. +func (h *StatusCodeHandler) startResetTimer() { + h.resetTimer = time.AfterFunc(statusResetInterval, func() { + h.mu.Lock() + defer h.mu.Unlock() + + h.statsProvider.mu.Lock() + h.statsProvider.statusCodeStats = make(map[string][5]int) + h.statsProvider.mu.Unlock() + + h.startResetTimer() + }) +} + +// updateStatusCodeCount updates the count for a given status code. +func (h *StatusCodeHandler) updateStatusCodeCount(stats *[5]int, statusCode int) { + switch statusCode { + case 200: + stats[0]++ + case 400: + stats[1]++ + case 408: + stats[2]++ + case 413: + stats[3]++ + case 429: + stats[4]++ + } +} + +func GetShortOperationName(operation string) string { + switch operation { + case "PutRetentionPolicy": + return "prp" + case "DescribeInstances": + return "di" + case "DescribeTasks": + return "dts" + case "DescribeTags": + return "dt" + case "DescribeVolumes": + return "dv" + case "DescribeContainerInstances": + return "dci" + case "DescribeServices": + return "ds" + case "DescribeTaskDefinition": + return "dtd" + case "ListServices": + return "ls" + case "ListTasks": + return "lt" + case "CreateLogGroup": + return "clg" + case "CreateLogStream": + return "cls" + default: + return "" + } +} diff --git a/extension/agenthealth/handler/stats/provider/statuscode_test.go b/extension/agenthealth/handler/stats/provider/statuscode_test.go new file mode 100644 index 0000000000..b2672bab40 --- /dev/null +++ b/extension/agenthealth/handler/stats/provider/statuscode_test.go @@ -0,0 +1,65 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package provider + +import ( + "reflect" + "testing" + + "github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent" +) + +func TestSingletonStatsProvider_Stats(t *testing.T) { + provider := &SingletonStatsProvider{ + statusCodeStats: map[string][5]int{ + "operation1": {1, 2, 3, 4, 5}, + }, + } + + stats := provider.Stats("operation1") + + expected := agent.Stats{ + StatusCodes: map[string][5]int{ + "operation1": {1, 2, 3, 4, 5}, + }, + } + + if !reflect.DeepEqual(stats, expected) { + t.Errorf("Stats() failed. Got %+v, expected %+v", stats, expected) + } +} + +func TestSingletonStatsProvider_UpdateStats(t *testing.T) { + provider := &SingletonStatsProvider{ + statusCodeStats: make(map[string][5]int), + } + + provider.UpdateStats("operation1", [5]int{1, 0, 0, 0, 0}) + + expected := map[string][5]int{ + "operation1": {1, 0, 0, 0, 0}, + } + + if !reflect.DeepEqual(provider.statusCodeStats, expected) { + t.Errorf("UpdateStats() failed. Got %+v, expected %+v", provider.statusCodeStats, expected) + } +} + +func TestGetShortOperationName(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"PutRetentionPolicy", "prp"}, + {"DescribeInstances", "di"}, + {"UnknownOperation", ""}, + } + + for _, test := range tests { + result := GetShortOperationName(test.input) + if result != test.expected { + t.Errorf("GetShortOperationName(%q) = %q; want %q", test.input, result, test.expected) + } + } +} diff --git a/plugins/outputs/cloudwatchlogs/cloudwatchlogs.go b/plugins/outputs/cloudwatchlogs/cloudwatchlogs.go index e1b0f4f457..bbec34db90 100644 --- a/plugins/outputs/cloudwatchlogs/cloudwatchlogs.go +++ b/plugins/outputs/cloudwatchlogs/cloudwatchlogs.go @@ -403,7 +403,7 @@ func init() { zap.NewNop(), &agenthealth.Config{ IsUsageDataEnabled: envconfig.IsUsageDataEnabled(), - Stats: agent.StatsConfig{Operations: []string{"PutLogEvents"}}, + Stats: &agent.StatsConfig{Operations: []string{"PutLogEvents"}}, }, ), } diff --git a/translator/translate/otel/extension/agenthealth/translator.go b/translator/translate/otel/extension/agenthealth/translator.go index ef39f9390c..6ca755341f 100644 --- a/translator/translate/otel/extension/agenthealth/translator.go +++ b/translator/translate/otel/extension/agenthealth/translator.go @@ -31,14 +31,25 @@ var ( ) type translator struct { - name string - operations []string - isUsageDataEnabled bool - factory extension.Factory + name string + operations []string + isUsageDataEnabled bool + factory extension.Factory + isStatusCodeEnabled bool } var _ common.Translator[component.Config] = (*translator)(nil) +func NewTranslatorWithStatusCode(name component.DataType, operations []string, statuscodeonly bool) common.Translator[component.Config] { + return &translator{ + name: name.String(), + operations: operations, + factory: agenthealth.NewFactory(), + isUsageDataEnabled: envconfig.IsUsageDataEnabled(), + isStatusCodeEnabled: statuscodeonly, + } +} + func NewTranslator(name component.DataType, operations []string) common.Translator[component.Config] { return &translator{ name: name.String(), @@ -59,7 +70,8 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { if usageData, ok := common.GetBool(conf, common.ConfigKey(common.AgentKey, usageDataKey)); ok { cfg.IsUsageDataEnabled = cfg.IsUsageDataEnabled && usageData } - cfg.Stats = agent.StatsConfig{ + cfg.IsStatusCodeEnabled = t.isStatusCodeEnabled + cfg.Stats = &agent.StatsConfig{ Operations: t.operations, UsageFlags: map[agent.Flag]any{ agent.FlagMode: context.CurrentContext().ShortMode(), diff --git a/translator/translate/otel/extension/agenthealth/translator_test.go b/translator/translate/otel/extension/agenthealth/translator_test.go index 989372e04a..41501ab0bf 100644 --- a/translator/translate/otel/extension/agenthealth/translator_test.go +++ b/translator/translate/otel/extension/agenthealth/translator_test.go @@ -35,7 +35,7 @@ func TestTranslate(t *testing.T) { isEnvUsageData: true, want: &agenthealth.Config{ IsUsageDataEnabled: true, - Stats: agent.StatsConfig{ + Stats: &agent.StatsConfig{ Operations: operations, UsageFlags: usageFlags, }, @@ -46,7 +46,7 @@ func TestTranslate(t *testing.T) { isEnvUsageData: true, want: &agenthealth.Config{ IsUsageDataEnabled: false, - Stats: agent.StatsConfig{ + Stats: &agent.StatsConfig{ Operations: operations, UsageFlags: usageFlags, }, @@ -57,7 +57,7 @@ func TestTranslate(t *testing.T) { isEnvUsageData: false, want: &agenthealth.Config{ IsUsageDataEnabled: false, - Stats: agent.StatsConfig{ + Stats: &agent.StatsConfig{ Operations: operations, UsageFlags: usageFlags, }, @@ -68,7 +68,7 @@ func TestTranslate(t *testing.T) { isEnvUsageData: true, want: &agenthealth.Config{ IsUsageDataEnabled: true, - Stats: agent.StatsConfig{ + Stats: &agent.StatsConfig{ Operations: operations, UsageFlags: usageFlags, },