Skip to content

Commit

Permalink
Merge pull request #6 from aion-dk/lkr-pull-databases
Browse files Browse the repository at this point in the history
Update to pull all databases
  • Loading branch information
av-lasse authored Dec 12, 2023
2 parents bcfc584 + bdbdb8f commit 47fea29
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 67 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ docker run -e exoscale_key=<YOUR_API_KEY> -e exoscale_secret=<YOUR_API_SECRET> -
```
Replace the following placeholders:
* <YOUR_API_KEY> and <YOUR_API_SECRET> with your Exoscale API key and secret.
* <DATABASE_NAMES> with a comma-separated list of the database names you want to monitor.

The following parameters are optional:
<DATABASE_ZONE>: Set this if you want to specify the Exoscale zone where your databases are located (e.g., 'de-muc-1'). If not specified, it defaults to 'ch-gva-2'.
<METRICS_PERIOD>: Set this if you want to specify the period for metric collection (e.g., 'hour', 'day', 'week', 'month', 'year'). If not specified, it defaults to 'hour'.
* <DATABASE_ZONE>: Set this if you want to specify the Exoscale zone where your databases are located (e.g., 'de-muc-1'). If not specified, it defaults to 'ch-gva-2'.
* <METRICS_PERIOD>: Set this if you want to specify the period for metric collection (e.g., 'hour', 'day', 'week', 'month', 'year'). If not specified, it defaults to 'hour'.
* <DATABASE_NAMES> with a comma-separated list of the database names you want to monitor. If not specified, it defaults to all databases in the zone

The exporter will start and expose Prometheus metrics on port 8080. You can configure your Prometheus server to scrape metrics from this exporter.

Expand Down
109 changes: 45 additions & 64 deletions dbaas_prometheus_exporter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import time
import os
import logging
import json
from exoscale.api.v2 import Client
from threading import Thread
from prometheus_client import start_http_server, Gauge

# Constants
SLEEP_INTERVAL = 30

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
Expand All @@ -12,93 +17,69 @@
api_key = os.environ.get('exoscale_key')
api_secret = os.environ.get('exoscale_secret')

#database name to scrape
# Database name to scrape
database_names_str = os.environ.get('database_names')

# Zone the database lives in
database_zone = os.environ.get('database_zone')
database_zone = os.environ.get('database_zone', 'ch-gva-2')

# Period parameter for the request
metrics_period = os.environ.get('metrics_period')
metrics_period = os.environ.get('metrics_period', 'hour')

# Check if the environment variables are set
if api_key is None or api_secret is None or api_key == "" or api_secret == "":
logger.error("Error: Please set the 'exoscale_key' and 'exoscale_secret' environment variables.")
exit(1)

if database_names_str is None or database_names_str == "":
logger.error("Error: Please set the 'database_names' environment variables.")
exit(1)
# Create an authentication object
exo = Client(api_key, api_secret, zone=database_zone)

# Explicit Zone declaration, reporting the Client defaults
if database_zone is None or database_zone == "":
database_zone = 'ch-gva-2'
logger.info(f"Info: Zone is set to {database_zone}.")

# Set period
ALLOWED_PERIODS = {'hour', 'week', 'year', 'month', 'day'}
DEFAULT_PERIOD = 'hour'
if metrics_period is None or metrics_period == "":
metrics_period = DEFAULT_PERIOD
if metrics_period not in ALLOWED_PERIODS:
metrics_period = DEFAULT_PERIOD
logger.warning(f"Warning: the 'metrics_period' environment variable is not one of {ALLOWED_PERIODS}, defaulting to '{DEFAULT_PERIOD}'.")
logger.info(f"Info: Zone is set to {database_zone}.")
logger.info(f"Info: Period is set to {metrics_period}.")

#split database names
database_names = database_names_str.split(',')

# Define Prometheus gauge metrics for each metric with a 'database' label
dbaas_disk_usage = Gauge('dbaas_disk_usage', 'Disk space usage percentage', ['database'])
dbaas_load_average = Gauge('dbaas_load_average', 'Load average (5 min)', ['database'])
dbaas_mem_usage = Gauge('dbaas_memory_usage', 'Memory usage percentage', ['database'])
dbaas_diskio_writes = Gauge('dbaas_disk_io_writes', 'Disk IOPS (writes)', ['database'])
dbaas_mem_available = Gauge('dbaas_memory_available', 'Memory available percentage', ['database'])
dbaas_cpu_usage = Gauge('dbaas_cpu_usage', 'CPU usage percentage', ['database'])
dbaas_diskio_reads = Gauge('dbaas_disk_io_reads', 'Disk IOPS (reads)', ['database'])
dbaas_net_send = Gauge('dbaas_network_transmit_bytes_per_sec', 'Network transmit (bytes/s)', ['database'])
dbaas_net_receive = Gauge('dbaas_network_receive_bytes_per_sec', 'Network receive (bytes/s)', ['database'])


# Create an authentication object
exo = Client(api_key, api_secret, zone=database_zone)


def fetch_metrics(database_names):
dbaas_metrics = {
'disk_usage': Gauge('dbaas_disk_usage', 'Disk space usage percentage', ['database']),
'load_average': Gauge('dbaas_load_average', 'Load average (5 min)', ['database']),
'mem_usage': Gauge('dbaas_memory_usage', 'Memory usage percentage', ['database']),
'diskio_writes': Gauge('dbaas_disk_io_writes', 'Disk IOPS (writes)', ['database']),
'mem_available': Gauge('dbaas_memory_available', 'Memory available percentage', ['database']),
'cpu_usage': Gauge('dbaas_cpu_usage', 'CPU usage percentage', ['database']),
'diskio_read': Gauge('dbaas_disk_io_reads', 'Disk IOPS (reads)', ['database']),
'net_send': Gauge('dbaas_network_transmit_bytes_per_sec', 'Network transmit (bytes/s)', ['database']),
'net_receive': Gauge('dbaas_network_receive_bytes_per_sec', 'Network receive (bytes/s)', ['database']),
}

def get_database_names():
if database_names_str is not None and database_names_str != "":
return database_names_str.split(',')
else:
# Get list of databases
data = exo.list_dbaas_services()
# Extract the names using a list comprehension
return [db.get('name') for db in data.get('dbaas-services', [])]

def fetch_metrics():
while True:
try:
for database_name in database_names:
# Get the latest database names
current_database_names = get_database_names()

for database_name in current_database_names:
response = exo.get_dbaas_service_metrics(
service_name=database_name,
period=metrics_period
)

if 'metrics' in response:
metrics = response['metrics']

# Extract the latest metric data for each metric
latest_disk_usage = metrics['disk_usage']['data']['rows'][-1][1]
latest_load_average = metrics['load_average']['data']['rows'][-1][1]
latest_mem_usage = metrics['mem_usage']['data']['rows'][-1][1]
latest_diskio_writes = metrics['diskio_writes']['data']['rows'][-1][1]
latest_mem_available = metrics['mem_available']['data']['rows'][-1][1]
latest_cpu_usage = metrics['cpu_usage']['data']['rows'][-1][1]
latest_diskio_reads = metrics['diskio_read']['data']['rows'][-1][1]
latest_net_send = metrics['net_send']['data']['rows'][-1][1]
latest_net_receive = metrics['net_receive']['data']['rows'][-1][1]

# Set the Prometheus metrics with the latest values
dbaas_disk_usage.labels(database=database_name).set(latest_disk_usage)
dbaas_load_average.labels(database=database_name).set(latest_load_average)
dbaas_mem_usage.labels(database=database_name).set(latest_mem_usage)
dbaas_diskio_writes.labels(database=database_name).set(latest_diskio_writes)
dbaas_mem_available.labels(database=database_name).set(latest_mem_available)
dbaas_cpu_usage.labels(database=database_name).set(latest_cpu_usage)
dbaas_diskio_reads.labels(database=database_name).set(latest_diskio_reads)
dbaas_net_send.labels(database=database_name).set(latest_net_send)
dbaas_net_receive.labels(database=database_name).set(latest_net_receive)

logger.info(f"Info: Metrics for {database_name} has been scraped")
for metric_name, metric_gauge in dbaas_metrics.items():
latest_value = metrics[metric_name]['data']['rows'][-1][1]
metric_gauge.labels(database=database_name).set(latest_value)

logger.info(f"Info: Metrics for {database_name} have been scraped")

elif 'message' in response:
logger.error(f"Error: Failed to fetch metrics for {database_name}: {response['message']}")
Expand All @@ -107,14 +88,14 @@ def fetch_metrics(database_names):
logger.error(f"Error: Failed to fetch metrics for {database_name}: unknown error")

except Exception as e:
logger.error(f"Error: An error occurred for {database_name}: {str(e)}")
logger.error(f"Error: An error occurred: {str(e)}")

# Sleep for some time before fetching metrics again
time.sleep(30)
time.sleep(SLEEP_INTERVAL)

if __name__ == '__main__':
# Start an HTTP server to expose the metrics
start_http_server(8080)

# Fetch and update metrics
fetch_metrics(database_names)
fetch_metrics()

0 comments on commit 47fea29

Please sign in to comment.