diff --git a/definitions/infra-awsalb/definition.yml b/definitions/infra-awsalb/definition.yml index 36e6f51a1..0fd84d2be 100644 --- a/definitions/infra-awsalb/definition.yml +++ b/definitions/infra-awsalb/definition.yml @@ -1,8 +1,18 @@ domain: INFRA type: AWSALB -goldenTags: -- aws.availabilityZone -- aws.accountId +goldenTags: + - aws.awsRegion + - aws.state + - aws.type + - aws.ipAdressType + - aws.dnsName + - aws.scheme + - aws.accountId + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: goldenMetrics: - golden_metrics.yml diff --git a/definitions/infra-awsalb/golden_metrics.yml b/definitions/infra-awsalb/golden_metrics.yml index 0d85a22e9..40cc1e732 100644 --- a/definitions/infra-awsalb/golden_metrics.yml +++ b/definitions/infra-awsalb/golden_metrics.yml @@ -1,16 +1,15 @@ -requests: - title: Requests +serverErrors4XxAnd5Xx: + title: Server errors (4xx and 5xx) query: - select: sum(provider.requestCount.Sum) + select: rate(sum((provider.httpCodeElb4XXCount.Sum OR 0) + (provider.httpCodeElb5XXCount.Sum OR 0)),1 minute) from: LoadBalancerSample where: provider='Alb' facet: entityName eventId: entityGuid -serverErrors4XxAnd5Xx: - title: Server errors (4xx and 5xx) +activeConnections: + title: Active Connections query: - select: sum((provider.httpCodeElb4XXCount.Sum OR 0) + (provider.httpCodeElb5XXCount.Sum - OR 0)) + select: rate(sum(provider.activeConnectionCount.Sum),1 minute) from: LoadBalancerSample where: provider='Alb' facet: entityName @@ -18,7 +17,7 @@ serverErrors4XxAnd5Xx: rejectedConnections: title: Rejected connections query: - select: sum(provider.rejectedConnectionCount.Sum) + select: rate(sum(provider.rejectedConnectionCount.Sum),1 minute) from: LoadBalancerSample where: provider='Alb' facet: entityName diff --git a/definitions/infra-awsalbtargetgroup/definition.yml b/definitions/infra-awsalbtargetgroup/definition.yml index f82903e93..182dfdfb9 100644 --- a/definitions/infra-awsalbtargetgroup/definition.yml +++ b/definitions/infra-awsalbtargetgroup/definition.yml @@ -1,8 +1,17 @@ domain: INFRA type: AWSALBTARGETGROUP -goldenTags: -- aws.availabilityZone -- aws.accountId +goldenTags: + - aws.awsRegion + - aws.matcher + - aws.port + - aws.protocol + - aws.targetGroupName + - aws.accountId + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: goldenMetrics: - golden_metrics.yml diff --git a/definitions/infra-awsalbtargetgroup/golden_metrics.yml b/definitions/infra-awsalbtargetgroup/golden_metrics.yml index a02747b3a..ac799fae2 100644 --- a/definitions/infra-awsalbtargetgroup/golden_metrics.yml +++ b/definitions/infra-awsalbtargetgroup/golden_metrics.yml @@ -1,7 +1,31 @@ +serverErrors4XxAnd5Xx: + title: Server errors (4xx and 5xx) + query: + select: rate(sum(`provider.httpCodeTarget4XXCount.Sum`) + sum(`provider.httpCodeTarget5XXCount.Sum`),1 minute) + from: LoadBalancerSample + where: provider='AlbTargetGroup' + facet: entityName + eventId: entityGuid +unhealthyHosts: + title: Unhealthy host count + query: + select: max(`provider.unHealthyHostCount.Maximum`) + from: LoadBalancerSample + where: provider='AlbTargetGroup' + facet: entityName + eventId: entityGuid +responseTime: + title: Average response time + query: + select: average(`provider.targetResponseTime.Average`) + from: LoadBalancerSample + where: provider='AlbTargetGroup' + facet: entityName + eventId: entityGuid requests: title: Requests query: - select: sum(provider.requestCountPerTarget.Sum) + select: rate(sum(provider.requestCountPerTarget.Sum),1 minute) from: LoadBalancerSample where: provider='AlbTargetGroup' facet: entityName diff --git a/definitions/infra-awsecscluster/definition.yml b/definitions/infra-awsecscluster/definition.yml index da9104624..a2745c3a6 100644 --- a/definitions/infra-awsecscluster/definition.yml +++ b/definitions/infra-awsecscluster/definition.yml @@ -1,7 +1,8 @@ domain: INFRA type: AWSECSCLUSTER goldenTags: -- aws.availabilityZone +- aws.awsRegion +- aws.clusterStatus - aws.accountId compositeMetrics: goldenMetrics: diff --git a/definitions/infra-awsecsservice/definition.yml b/definitions/infra-awsecsservice/definition.yml index 60329fc6f..b739cc40c 100644 --- a/definitions/infra-awsecsservice/definition.yml +++ b/definitions/infra-awsecsservice/definition.yml @@ -1,7 +1,11 @@ domain: INFRA type: AWSECSSERVICE goldenTags: -- aws.availabilityZone +- aws.clusterName +- aws.awsRegion +- aws.clusterName +- aws.launchType +- aws.serviceStatus - aws.accountId compositeMetrics: goldenMetrics: diff --git a/definitions/infra-awslambdafunction/definition.yml b/definitions/infra-awslambdafunction/definition.yml index 53d5b2c00..03bd05b67 100644 --- a/definitions/infra-awslambdafunction/definition.yml +++ b/definitions/infra-awslambdafunction/definition.yml @@ -1,8 +1,17 @@ domain: INFRA type: AWSLAMBDAFUNCTION goldenTags: -- aws.availabilityZone -- aws.accountId + - aws.awsRegion + - aws.accountId + - aws.memorySize + - aws.runtime + - aws.timeout + - aws.handler + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: goldenMetrics: - golden_metrics.yml diff --git a/definitions/infra-awslambdafunction/golden_metrics.yml b/definitions/infra-awslambdafunction/golden_metrics.yml index 6ef08024a..dfe195ca8 100644 --- a/definitions/infra-awslambdafunction/golden_metrics.yml +++ b/definitions/infra-awslambdafunction/golden_metrics.yml @@ -1,21 +1,32 @@ +errorRate: + title: Error rate % + query: + select: sum(provider.errors.Sum) * 100 / sum(provider.invocations.Sum) + from: ServerlessSample + where: provider='LambdaFunction' + eventId: entityGuid + facet: entityName totalInvocations: title: Total Invocations query: - select: sum(provider.invocations.Sum) + select: rate(sum(provider.invocations.Sum),1 minute) from: ServerlessSample - facet: entityName + where: provider='LambdaFunction' eventId: entityGuid + facet: entityName duration99PercentileS: title: Duration (99 percentile) (s) query: - select: average(provider.duration.Maximum) / 1000 + select: max(provider.duration.Maximum) / 1000 from: ServerlessSample - facet: entityName + where: provider='LambdaFunction' eventId: entityGuid -errorRate: - title: Error rate + facet: entityName +throttles: + title: Throttled invocations query: - select: sum(provider.errors.Sum) * 100 / sum(provider.invocations.Sum) + select: rate(sum(provider.throttles.Sum), 1 minute) from: ServerlessSample - facet: entityName + where: provider='LambdaFunction' eventId: entityGuid + facet: entityName \ No newline at end of file diff --git a/definitions/infra-awslambdafunctionalias/definition.yml b/definitions/infra-awslambdafunctionalias/definition.yml index cff318939..5567c1057 100644 --- a/definitions/infra-awslambdafunctionalias/definition.yml +++ b/definitions/infra-awslambdafunctionalias/definition.yml @@ -1,2 +1,17 @@ domain: INFRA type: AWSLAMBDAFUNCTIONALIAS +goldenTags: + - aws.region + - aws.accountId + - aws.aliasName + - aws.functionName + - aws.functionVersion + - aws.resource + - account + - label.Team + - label.team + - label.env + - label.environment +compositeMetrics: + goldenMetrics: + - golden_metrics.yml \ No newline at end of file diff --git a/definitions/infra-awslambdafunctionalias/golden_metrics.yml b/definitions/infra-awslambdafunctionalias/golden_metrics.yml new file mode 100644 index 000000000..dfe195ca8 --- /dev/null +++ b/definitions/infra-awslambdafunctionalias/golden_metrics.yml @@ -0,0 +1,32 @@ +errorRate: + title: Error rate % + query: + select: sum(provider.errors.Sum) * 100 / sum(provider.invocations.Sum) + from: ServerlessSample + where: provider='LambdaFunction' + eventId: entityGuid + facet: entityName +totalInvocations: + title: Total Invocations + query: + select: rate(sum(provider.invocations.Sum),1 minute) + from: ServerlessSample + where: provider='LambdaFunction' + eventId: entityGuid + facet: entityName +duration99PercentileS: + title: Duration (99 percentile) (s) + query: + select: max(provider.duration.Maximum) / 1000 + from: ServerlessSample + where: provider='LambdaFunction' + eventId: entityGuid + facet: entityName +throttles: + title: Throttled invocations + query: + select: rate(sum(provider.throttles.Sum), 1 minute) + from: ServerlessSample + where: provider='LambdaFunction' + eventId: entityGuid + facet: entityName \ No newline at end of file diff --git a/definitions/infra-awslambdaregion/definition.yml b/definitions/infra-awslambdaregion/definition.yml index b55114ac0..f4ea7f674 100644 --- a/definitions/infra-awslambdaregion/definition.yml +++ b/definitions/infra-awslambdaregion/definition.yml @@ -1,2 +1,14 @@ domain: INFRA type: AWSLAMBDAREGION +goldenTags: + - aws.awsRegion + - aws.accountId + - aws.concurrentExecutions + - account + - label.Team + - label.team + - label.env + - label.environment +compositeMetrics: + goldenMetrics: + - golden_metrics.yml \ No newline at end of file diff --git a/definitions/infra-awslambdaregion/golden_metrics.yml b/definitions/infra-awslambdaregion/golden_metrics.yml new file mode 100644 index 000000000..bdd745c6b --- /dev/null +++ b/definitions/infra-awslambdaregion/golden_metrics.yml @@ -0,0 +1,16 @@ +ConcurrentExecutions: + title: Max Current Executions + query: + select: max(`provider.concurrentExecutions.Maximum`) + from: ServerlessSample + where: provider='LambdaRegion' + eventId: entityGuid + facet: entityName +UnreservedConcurrentExecutions: + title: Max Unreserved Current Executions + query: + select: max(`provider.unreservedConcurrentExecutions.Maximum`) + from: ServerlessSample + where: provider='LambdaRegion' + eventId: entityGuid + facet: entityName \ No newline at end of file diff --git a/definitions/infra-awsnlb/definition.yml b/definitions/infra-awsnlb/definition.yml index 4f2161731..0f1aa054b 100644 --- a/definitions/infra-awsnlb/definition.yml +++ b/definitions/infra-awsnlb/definition.yml @@ -1,10 +1,19 @@ domain: INFRA type: AWSNLB goldenTags: -- aws.availabilityZone -- aws.accountId + - aws.awsRegion + - aws.state + - aws.type + - aws.ipAdressType + - aws.dnsName + - aws.accountId + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: goldenMetrics: - golden_metrics.yml summaryMetrics: - - summary_metrics.yml + - summary_metrics.yml \ No newline at end of file diff --git a/definitions/infra-awsnlb/golden_metrics.yml b/definitions/infra-awsnlb/golden_metrics.yml index b34347abf..a22853bd5 100644 --- a/definitions/infra-awsnlb/golden_metrics.yml +++ b/definitions/infra-awsnlb/golden_metrics.yml @@ -1,32 +1,33 @@ -concurrentFlows: - title: Concurrent flows +failedClientNlbTlsHandshakes: + title: Failed client-NLB TLS handshakes query: - select: average(provider.activeFlowCount.Average) + select: rate((sum(provider.clientTlsNegotiationErrorCount.Sum),1 minute) from: LoadBalancerSample where: provider='Nlb' - facet: entityName eventId: entityGuid -concurrentTlsFlows: - title: Concurrent TLS flows + facet: entityName +failedNlbTargetTlsHandshakes: + title: Failed NLB-target TLS handshakes query: - select: average(provider.activeFlowCountTls.Average) + select: rate(sum(provider.targetTlsNegotiationErrorCount.Sum),1 minute) from: LoadBalancerSample where: provider='Nlb' - facet: entityName eventId: entityGuid -failedClientNlbTlsHandshakes: - title: Failed client-NLB TLS handshakes + facet: entityName +concurrentFlows: + title: Avg Concurrent flows query: - select: sum(provider.clientTlsNegotiationErrorCount.Sum) + select: average(provider.activeFlowCount.Average) from: LoadBalancerSample where: provider='Nlb' - facet: entityName eventId: entityGuid -failedNlbTargetTlsHandshakes: - title: Failed NLB-target TLS handshakes + facet: entityName +concurrentTlsFlows: + title: Avg Concurrent TLS flows query: - select: sum(provider.targetTlsNegotiationErrorCount.Sum) + select: average(provider.activeFlowCountTls.Average) from: LoadBalancerSample where: provider='Nlb' - facet: entityName eventId: entityGuid + facet: entityName + \ No newline at end of file diff --git a/definitions/infra-awsnlbtargetgroup/definition.yml b/definitions/infra-awsnlbtargetgroup/definition.yml index 97992804d..0d177aa15 100644 --- a/definitions/infra-awsnlbtargetgroup/definition.yml +++ b/definitions/infra-awsnlbtargetgroup/definition.yml @@ -1,10 +1,19 @@ domain: INFRA type: AWSNLBTARGETGROUP goldenTags: -- aws.availabilityZone -- aws.accountId + - aws.awsRegion + - aws.state + - aws.type + - aws.ipAdressType + - aws.dnsName + - aws.accountId + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: goldenMetrics: - golden_metrics.yml summaryMetrics: - - summary_metrics.yml + - summary_metrics.yml \ No newline at end of file diff --git a/definitions/infra-awsnlbtargetgroup/golden_metrics.yml b/definitions/infra-awsnlbtargetgroup/golden_metrics.yml index 89336fb58..a22853bd5 100644 --- a/definitions/infra-awsnlbtargetgroup/golden_metrics.yml +++ b/definitions/infra-awsnlbtargetgroup/golden_metrics.yml @@ -1,16 +1,33 @@ -healthyHosts: - title: Healthy hosts +failedClientNlbTlsHandshakes: + title: Failed client-NLB TLS handshakes query: - select: min(provider.healthyHostCount.Minimum) + select: rate((sum(provider.clientTlsNegotiationErrorCount.Sum),1 minute) from: LoadBalancerSample - where: provider='NlbTargetGroup' + where: provider='Nlb' + eventId: entityGuid facet: entityName +failedNlbTargetTlsHandshakes: + title: Failed NLB-target TLS handshakes + query: + select: rate(sum(provider.targetTlsNegotiationErrorCount.Sum),1 minute) + from: LoadBalancerSample + where: provider='Nlb' eventId: entityGuid -unhealthyHosts: - title: Unhealthy hosts + facet: entityName +concurrentFlows: + title: Avg Concurrent flows query: - select: max(provider.unHealthyHostCount.Maximum) + select: average(provider.activeFlowCount.Average) from: LoadBalancerSample - where: provider='NlbTargetGroup' + where: provider='Nlb' + eventId: entityGuid facet: entityName +concurrentTlsFlows: + title: Avg Concurrent TLS flows + query: + select: average(provider.activeFlowCountTls.Average) + from: LoadBalancerSample + where: provider='Nlb' eventId: entityGuid + facet: entityName + \ No newline at end of file diff --git a/definitions/infra-awsredshiftcluster/definition.yml b/definitions/infra-awsredshiftcluster/definition.yml index 088aa039b..9d35f7460 100644 --- a/definitions/infra-awsredshiftcluster/definition.yml +++ b/definitions/infra-awsredshiftcluster/definition.yml @@ -1,5 +1,18 @@ domain: INFRA type: AWSREDSHIFTCLUSTER +goldenTags: + - aws.awsRegion + - aws.clusterId + - aws.accountId + - aws.dbName + - aws.nodeType + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: + goldenMetrics: + - golden_metrics.yml summaryMetrics: - - summary_metrics.yml + - summary_metrics.yml \ No newline at end of file diff --git a/definitions/infra-awsredshiftcluster/golden_metrics.yml b/definitions/infra-awsredshiftcluster/golden_metrics.yml new file mode 100644 index 000000000..c965c1881 --- /dev/null +++ b/definitions/infra-awsredshiftcluster/golden_metrics.yml @@ -0,0 +1,32 @@ +CPUUtilization: + title: Max CPU Utilization + query: + select: max(`provider.cpuUtilization.Maximum`) + from: DatastoreSample + where: provider='RedshiftCluster' + facet: entityName + eventId: entityGuid +QueryDuration: + title: Max Query Duration + query: + select: max(`provider.QueryDuration.Maximum`) + from: DatastoreSample + where: provider='RedshiftCluster' + facet: entityName + eventId: entityGuid +HealthStatus: + title: Cluster Health Status + query: + select: min(`provider.HealthStatus.Minimum`) + from: DatastoreSample + where: provider='RedshiftCluster' + facet: entityName + eventId: entityGuid +DatabaseConnections: + title: Database Connections + query: + select: max(`provider.DatabaseConnections.Maximum`) + from: DatastoreSample + where: provider='RedshiftCluster' + facet: entityName + eventId: entityGuid \ No newline at end of file diff --git a/definitions/infra-awsredshiftnode/definition.yml b/definitions/infra-awsredshiftnode/definition.yml index 9767cd3b8..5e74fa625 100644 --- a/definitions/infra-awsredshiftnode/definition.yml +++ b/definitions/infra-awsredshiftnode/definition.yml @@ -1,5 +1,17 @@ domain: INFRA type: AWSREDSHIFTNODE +goldenTags: + - aws.awsRegion + - aws.clusterId + - aws.accountId + - aws.nodeId + - account + - label.Team + - label.team + - label.env + - label.environment compositeMetrics: + goldenMetrics: + - golden_metrics.yml summaryMetrics: - - summary_metrics.yml + - summary_metrics.yml \ No newline at end of file diff --git a/definitions/infra-awsredshiftnode/golden_metrics.yml b/definitions/infra-awsredshiftnode/golden_metrics.yml new file mode 100644 index 000000000..0ec2996f2 --- /dev/null +++ b/definitions/infra-awsredshiftnode/golden_metrics.yml @@ -0,0 +1,48 @@ +CPUUtilization: + title: Max CPU Utilization + query: + select: max(`provider.cpuUtilization.Maximum`) + from: DatastoreSample + where: provider='RedshiftNode' + facet: entityName + eventId: entityGuid +DatabaseConnections: + title: Database Connections + query: + select: max(`provider.DatabaseConnections.Maximum`) + from: DatastoreSample + where: provider='RedshiftNode' + facet: entityName + eventId: entityGuid +ReadLatency: + title: Max Read Latency + query: + select: max(`provider.ReadLatency.Maximum`) + from: DatastoreSample + where: provider='RedshiftNode' + facet: entityName + eventId: entityGuid +WriteLatency: + title: Max Write Latency + query: + select: max(`provider.WriteLatency.Maximum`) + from: DatastoreSample + where: provider='RedshiftNode' + facet: entityName + eventId: entityGuid +PercentageDiskSpace: + title: Max Percentage disk space used + query: + select: max(`provider.PercentageDiskSpaceUsed.Maximum`) + from: DatastoreSample + where: provider='RedshiftNode' + facet: entityName + eventId: entityGuid +HealthStatus: + title: Cluster Health Status + query: + select: min(`provider.HealthStatus.Minimum`) + from: DatastoreSample + where: provider='RedshiftNode' + facet: entityName + eventId: entityGuid \ No newline at end of file