Skip to content

Commit

Permalink
Adding definitions of Golden tags and golden metrics (#71)
Browse files Browse the repository at this point in the history
Add golden tags and golden metrics for AWS related resources.
  • Loading branch information
AlbertoGoYu committed Feb 18, 2021
1 parent 4d2e243 commit 94a3305
Show file tree
Hide file tree
Showing 20 changed files with 342 additions and 59 deletions.
16 changes: 13 additions & 3 deletions definitions/infra-awsalb/definition.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
domain: INFRA
type: AWSALB
goldenTags:
- aws.availabilityZone
- aws.accountId
goldenTags:
- aws.awsRegion
- aws.state
- aws.type
- aws.ipAdressType
- aws.dnsName
- aws.scheme
- aws.accountId
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
Expand Down
15 changes: 7 additions & 8 deletions definitions/infra-awsalb/golden_metrics.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
requests:
title: Requests
serverErrors4XxAnd5Xx:
title: Server errors (4xx and 5xx)
query:
select: sum(provider.requestCount.Sum)
select: rate(sum((provider.httpCodeElb4XXCount.Sum OR 0) + (provider.httpCodeElb5XXCount.Sum OR 0)),1 minute)
from: LoadBalancerSample
where: provider='Alb'
facet: entityName
eventId: entityGuid
serverErrors4XxAnd5Xx:
title: Server errors (4xx and 5xx)
activeConnections:
title: Active Connections
query:
select: sum((provider.httpCodeElb4XXCount.Sum OR 0) + (provider.httpCodeElb5XXCount.Sum
OR 0))
select: rate(sum(provider.activeConnectionCount.Sum),1 minute)
from: LoadBalancerSample
where: provider='Alb'
facet: entityName
eventId: entityGuid
rejectedConnections:
title: Rejected connections
query:
select: sum(provider.rejectedConnectionCount.Sum)
select: rate(sum(provider.rejectedConnectionCount.Sum),1 minute)
from: LoadBalancerSample
where: provider='Alb'
facet: entityName
Expand Down
15 changes: 12 additions & 3 deletions definitions/infra-awsalbtargetgroup/definition.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
domain: INFRA
type: AWSALBTARGETGROUP
goldenTags:
- aws.availabilityZone
- aws.accountId
goldenTags:
- aws.awsRegion
- aws.matcher
- aws.port
- aws.protocol
- aws.targetGroupName
- aws.accountId
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
Expand Down
26 changes: 25 additions & 1 deletion definitions/infra-awsalbtargetgroup/golden_metrics.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,31 @@
serverErrors4XxAnd5Xx:
title: Server errors (4xx and 5xx)
query:
select: rate(sum(`provider.httpCodeTarget4XXCount.Sum`) + sum(`provider.httpCodeTarget5XXCount.Sum`),1 minute)
from: LoadBalancerSample
where: provider='AlbTargetGroup'
facet: entityName
eventId: entityGuid
unhealthyHosts:
title: Unhealthy host count
query:
select: max(`provider.unHealthyHostCount.Maximum`)
from: LoadBalancerSample
where: provider='AlbTargetGroup'
facet: entityName
eventId: entityGuid
responseTime:
title: Average response time
query:
select: average(`provider.targetResponseTime.Average`)
from: LoadBalancerSample
where: provider='AlbTargetGroup'
facet: entityName
eventId: entityGuid
requests:
title: Requests
query:
select: sum(provider.requestCountPerTarget.Sum)
select: rate(sum(provider.requestCountPerTarget.Sum),1 minute)
from: LoadBalancerSample
where: provider='AlbTargetGroup'
facet: entityName
Expand Down
3 changes: 2 additions & 1 deletion definitions/infra-awsecscluster/definition.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
domain: INFRA
type: AWSECSCLUSTER
goldenTags:
- aws.availabilityZone
- aws.awsRegion
- aws.clusterStatus
- aws.accountId
compositeMetrics:
goldenMetrics:
Expand Down
6 changes: 5 additions & 1 deletion definitions/infra-awsecsservice/definition.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
domain: INFRA
type: AWSECSSERVICE
goldenTags:
- aws.availabilityZone
- aws.clusterName
- aws.awsRegion
- aws.clusterName
- aws.launchType
- aws.serviceStatus
- aws.accountId
compositeMetrics:
goldenMetrics:
Expand Down
13 changes: 11 additions & 2 deletions definitions/infra-awslambdafunction/definition.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
domain: INFRA
type: AWSLAMBDAFUNCTION
goldenTags:
- aws.availabilityZone
- aws.accountId
- aws.awsRegion
- aws.accountId
- aws.memorySize
- aws.runtime
- aws.timeout
- aws.handler
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
27 changes: 19 additions & 8 deletions definitions/infra-awslambdafunction/golden_metrics.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,32 @@
errorRate:
title: Error rate %
query:
select: sum(provider.errors.Sum) * 100 / sum(provider.invocations.Sum)
from: ServerlessSample
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
totalInvocations:
title: Total Invocations
query:
select: sum(provider.invocations.Sum)
select: rate(sum(provider.invocations.Sum),1 minute)
from: ServerlessSample
facet: entityName
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
duration99PercentileS:
title: Duration (99 percentile) (s)
query:
select: average(provider.duration.Maximum) / 1000
select: max(provider.duration.Maximum) / 1000
from: ServerlessSample
facet: entityName
where: provider='LambdaFunction'
eventId: entityGuid
errorRate:
title: Error rate
facet: entityName
throttles:
title: Throttled invocations
query:
select: sum(provider.errors.Sum) * 100 / sum(provider.invocations.Sum)
select: rate(sum(provider.throttles.Sum), 1 minute)
from: ServerlessSample
facet: entityName
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
15 changes: 15 additions & 0 deletions definitions/infra-awslambdafunctionalias/definition.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,17 @@
domain: INFRA
type: AWSLAMBDAFUNCTIONALIAS
goldenTags:
- aws.region
- aws.accountId
- aws.aliasName
- aws.functionName
- aws.functionVersion
- aws.resource
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
32 changes: 32 additions & 0 deletions definitions/infra-awslambdafunctionalias/golden_metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
errorRate:
title: Error rate %
query:
select: sum(provider.errors.Sum) * 100 / sum(provider.invocations.Sum)
from: ServerlessSample
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
totalInvocations:
title: Total Invocations
query:
select: rate(sum(provider.invocations.Sum),1 minute)
from: ServerlessSample
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
duration99PercentileS:
title: Duration (99 percentile) (s)
query:
select: max(provider.duration.Maximum) / 1000
from: ServerlessSample
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
throttles:
title: Throttled invocations
query:
select: rate(sum(provider.throttles.Sum), 1 minute)
from: ServerlessSample
where: provider='LambdaFunction'
eventId: entityGuid
facet: entityName
12 changes: 12 additions & 0 deletions definitions/infra-awslambdaregion/definition.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
domain: INFRA
type: AWSLAMBDAREGION
goldenTags:
- aws.awsRegion
- aws.accountId
- aws.concurrentExecutions
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
16 changes: 16 additions & 0 deletions definitions/infra-awslambdaregion/golden_metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
ConcurrentExecutions:
title: Max Current Executions
query:
select: max(`provider.concurrentExecutions.Maximum`)
from: ServerlessSample
where: provider='LambdaRegion'
eventId: entityGuid
facet: entityName
UnreservedConcurrentExecutions:
title: Max Unreserved Current Executions
query:
select: max(`provider.unreservedConcurrentExecutions.Maximum`)
from: ServerlessSample
where: provider='LambdaRegion'
eventId: entityGuid
facet: entityName
15 changes: 12 additions & 3 deletions definitions/infra-awsnlb/definition.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
domain: INFRA
type: AWSNLB
goldenTags:
- aws.availabilityZone
- aws.accountId
- aws.awsRegion
- aws.state
- aws.type
- aws.ipAdressType
- aws.dnsName
- aws.accountId
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
summaryMetrics:
- summary_metrics.yml
- summary_metrics.yml
33 changes: 17 additions & 16 deletions definitions/infra-awsnlb/golden_metrics.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,33 @@
concurrentFlows:
title: Concurrent flows
failedClientNlbTlsHandshakes:
title: Failed client-NLB TLS handshakes
query:
select: average(provider.activeFlowCount.Average)
select: rate((sum(provider.clientTlsNegotiationErrorCount.Sum),1 minute)
from: LoadBalancerSample
where: provider='Nlb'
facet: entityName
eventId: entityGuid
concurrentTlsFlows:
title: Concurrent TLS flows
facet: entityName
failedNlbTargetTlsHandshakes:
title: Failed NLB-target TLS handshakes
query:
select: average(provider.activeFlowCountTls.Average)
select: rate(sum(provider.targetTlsNegotiationErrorCount.Sum),1 minute)
from: LoadBalancerSample
where: provider='Nlb'
facet: entityName
eventId: entityGuid
failedClientNlbTlsHandshakes:
title: Failed client-NLB TLS handshakes
facet: entityName
concurrentFlows:
title: Avg Concurrent flows
query:
select: sum(provider.clientTlsNegotiationErrorCount.Sum)
select: average(provider.activeFlowCount.Average)
from: LoadBalancerSample
where: provider='Nlb'
facet: entityName
eventId: entityGuid
failedNlbTargetTlsHandshakes:
title: Failed NLB-target TLS handshakes
facet: entityName
concurrentTlsFlows:
title: Avg Concurrent TLS flows
query:
select: sum(provider.targetTlsNegotiationErrorCount.Sum)
select: average(provider.activeFlowCountTls.Average)
from: LoadBalancerSample
where: provider='Nlb'
facet: entityName
eventId: entityGuid
facet: entityName

15 changes: 12 additions & 3 deletions definitions/infra-awsnlbtargetgroup/definition.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
domain: INFRA
type: AWSNLBTARGETGROUP
goldenTags:
- aws.availabilityZone
- aws.accountId
- aws.awsRegion
- aws.state
- aws.type
- aws.ipAdressType
- aws.dnsName
- aws.accountId
- account
- label.Team
- label.team
- label.env
- label.environment
compositeMetrics:
goldenMetrics:
- golden_metrics.yml
summaryMetrics:
- summary_metrics.yml
- summary_metrics.yml
Loading

0 comments on commit 94a3305

Please sign in to comment.