Skip to content

Commit

Permalink
Simplify altaro result metrics, allow filtering by next scheduled
Browse files Browse the repository at this point in the history
  • Loading branch information
deajan committed Sep 24, 2024
1 parent 633d316 commit a45b347
Show file tree
Hide file tree
Showing 8 changed files with 965 additions and 33 deletions.
20 changes: 11 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ This is a Hornet Security / Altaro VM Backup v8 and v9 data exporter for Prometh

You can find an example dashboard in the examples directory

![image](examples/grafana_dashboard_v0.9.png)
![image](examples/grafana_dashboard_v0.9b.png)
![image](examples/grafana_dashboard_v0.10.0.png)

### Install

Expand All @@ -19,11 +18,14 @@ Extract the zip file to let's say `C:\altaro_exporter`
In the directory, you'll find the binaries as well as the config file `altaro_exporter.yaml`

Configure your local/domain administrator account according to your needs. Don't worry, once running, the user and password will be encrypted.
Also, configure if you want to include non scheduled and/or unconfigured VMs in your metrics.
By default, we include them since it makes sense to have too much information.
Nevertheless, on a lot of backup policies, they should be excluded in order to avoid false positives.

Once you're done, create a Windows Service with the following commands

```
sc create altaro_exporter DisplayName= "Altaro API exporter for Prometheus" start= auto binpath= "c:\altaro_exporter\altaro_exporter-x64.exe -c c:\altaro_exporter\altaro_exporter.yaml"
sc create altaro_exporter DisplayName= "HornetSecurity Altaro API exporter for Prometheus" start= auto binpath= "c:\altaro_exporter\altaro_exporter-x64.exe -c c:\altaro_exporter\altaro_exporter.yaml"
sc Description altaro_exporter "Altaro API exporter service by NetInvent"
```

Expand Down Expand Up @@ -53,18 +55,18 @@ Keep in mind that you need to create a firewall rule if you want to query it's o

### Metrics

API status metric (0 = OK, 1 = Cannot connect to API, 2 = API didn't like our request)
API status metric
```
altaro_api_success
altaro_api_success (0 = OK, 1 = Cannot connect to API, 2 = API didn't like our request)
```

The follwoing metrics have this labels:
` hostname,vmname,vmuuid `

metrics:
```
altaro_lastoffsitecopy_result
altaro_lastbackup_result
altaro_lastoffsitecopy_result (0 = Succces, 1 = Warning, 2 = Error, 3 = Unknown, 4 = Other)
altaro_lastbackup_result (0 = Succces, 1 = Warning, 2 = Error, 3 = Unknown, 4 = Other)
altaro_lastoffsitecopy_transfersize_uncompressed_bytes
altaro_lastoffsitecopy_transfersize_compressed_bytes
altaro_lastbackup_transfersize_uncompressed_bytes
Expand All @@ -79,11 +81,11 @@ altaro_lastbackup_timestamp

```
- alert: Last Backup not successful
expr: altaro_lastbackup_result{altaro_lastbackup_result="Success"} != 1
expr: altaro_lastbackup_result{} > 0
for: 1m
- alert: Last OffSite Copy not successful
expr: altaro_lastoffsitecopy_result{altaro_lastoffsitecopy_result="Success"} != 1
expr: altaro_lastoffsitecopy_result{} > 0
for: 1m
- alert: Last Backup older than 30 hours
Expand Down
3 changes: 3 additions & 0 deletions altaro_exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ altaro_server:
rest_port: 36013
# rest path is /api in v8 and v9, and /api/rest in v9.1
rest_path: /api/rest
options:
include_unconfigured: true
include_non_scheduled: true
http_server:
port: 9769
listen: 0.0.0.0
Expand Down
4 changes: 2 additions & 2 deletions altaro_exporter/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@
__description__ = "Altaro Prometheus Exporter"
__copyright__ = "Copyright (C) 2024 NetInvent"
__license__ = "GPL-3.0-only"
__build__ = "2024091001"
__version__ = "0.9.1"
__build__ = "2024092401"
__version__ = "0.10.0"
71 changes: 51 additions & 20 deletions altaro_exporter/altaro_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
__description__ = "Altaro API Prometheus data exporter"
__copyright__ = "Copyright (C) 2024 NetInvent"
__license__ = "GPL-3.0-only"
__build__ = "2024091001"
__build__ = "2024092401"

from ofunctions.requestor import Requestor
from ofunctions.logger_utils import logger_get_logger
from ofunctions.misc import fn_name
from logging import getLogger
import time
Expand Down Expand Up @@ -138,18 +137,16 @@ def __init__(
"Uncompressed size of last offsite copy",
["vmname", "hostname", "vmuuid"],
)

self.enum_lastbackup_result = Enum(
self.gauge_lastbackup_result = Gauge(
"altaro_lastbackup_result",
"Result of last backup",
"Result of last backup 0 = success, 1 = warning, 2 = error, 3 = unknown, 4 = other",
["vmname", "hostname", "vmuuid"],
states=["Success", "Warning", "Error"],
)
self.enum_lastoffsitecopy_result = Enum(

self.gauge_lastoffsitecopy_result = Gauge(
"altaro_lastoffsitecopy_result",
"Result of last offsite copy",
"Result of last offsite copy 0 = success, 1 = warning, 2 = error, 3 = unknown, 4 = other",
["vmname", "hostname", "vmuuid"],
states=["Success", "Warning", "Error"],
)

# Create a metric to track time spent and requests made.
Expand Down Expand Up @@ -230,7 +227,9 @@ def _api_request(
self.gauge_altaro_api_success.set(0)
return result

def list_vms(self, include_unconfigured: bool = False):
def list_vms(
self, include_unconfigured: bool = False, include_non_scheduled: bool = False
):
result = self._api_request(
pre_endpoint=f"/{self.altaro_rest_path}/vms/list/",
post_endpoint="/1" if not include_unconfigured else "",
Expand All @@ -248,6 +247,12 @@ def list_vms(self, include_unconfigured: bool = False):
vmname = vm["VirtualMachineName"]
hostname = vm["HostName"]
vmuuid = vm["HypervisorVirtualMachineUuid"]
is_scheduled = vm["NextBackupTime"] or vm["NextOffsiteCopyTime"]
if not is_scheduled and not include_non_scheduled:
logger.info(
f"Skipping VM {vmname} on {hostname} as it is not scheduled"
)
continue
logger.info(f"Found VM {vmname} on {hostname}")

# Last Backup, ex 2024-08-13-01-53-14
Expand Down Expand Up @@ -308,19 +313,45 @@ def list_vms(self, include_unconfigured: bool = False):

# LastBackupResult
try:
self.enum_lastbackup_result.labels(vmname, hostname, vmuuid).state(
vm["LastBackupResult"]
)
except Exception:
logger.info(f"{vmname} has no last backup")
if vm["LastBackupResult"].lower() == "success":
last_backup_result = 0
elif vm["LastBackupResult"].lower() == "warning":
last_backup_result = 1
elif vm["LastBackupResult"].lower() == "error":
last_backup_result = 2
elif vm["LastBackupResult"].lower() == "unknown":
last_backup_result = 3
elif vm["LastBackupResult"] is not None:
last_backup_result = 4
else:
last_backup_result = None
if last_backup_result is not None:
self.gauge_lastbackup_result.labels(vmname, hostname, vmuuid).set(
last_backup_result
)
except Exception as exc:
logger.info(f"{vmname} has no last backup: {exc}")

# LastOffsiteCopyResult
try:
self.enum_lastoffsitecopy_result.labels(vmname, hostname, vmuuid).state(
vm["LastOffsiteCopyResult"]
)
except Exception:
logger.info(f"{vmname} has no lastoffsitecopy")
if vm["LastOffsiteCopyResult"].lower() == "success":
last_offsite_backup_result = 0
elif vm["LastOffsiteCopyResult"].lower() == "warning":
last_offsite_backup_result = 1
elif vm["LastOffsiteCopyResult"].lower() == "error":
last_offsite_backup_result = 2
elif vm["LastOffsiteCopyResult"].lower() == "unknown":
last_offsite_backup_result = 3
elif vm["LastOffsiteCopyResult"] is not None:
last_offsite_backup_result = 4
else:
last_offsite_backup_result = None
if last_offsite_backup_result is not None:
self.gauge_lastoffsitecopy_result.labels(
vmname, hostname, vmuuid
).set(last_offsite_backup_result)
except Exception as exc:
logger.info(f"{vmname} has no lastoffsitecopy: {exc}")
return True


Expand Down
1 change: 0 additions & 1 deletion altaro_exporter/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from cryptidy import symmetric_encryption as enc
from ruamel.yaml import YAML
from ruamel.yaml.compat import ordereddict
from ruamel.yaml.comments import CommentedMap
from ofunctions.misc import replace_in_iterable

ID_STRING = "__ALTARO__"
Expand Down
14 changes: 13 additions & 1 deletion altaro_exporter/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@
password = config_dict.g("altaro_server.password")
domain = config_dict.g("altaro_server.domain")

try:
include_unconfigured = config_dict["options"]["include_unconfigured"]
except:
include_unconfigured = True
try:
include_non_scheduled = config_dict["options"]["include_non_scheduled"]
except:
include_non_scheduled = True


app = FastAPIOffline()
metrics_app = prometheus_client.make_asgi_app()
Expand Down Expand Up @@ -125,7 +134,10 @@ async def api_root(auth=Depends(auth_scheme)):
@app.get("/metrics")
async def get_metrics(auth=Depends(auth_scheme)):
try:
api.list_vms()
api.list_vms(
include_unconfigured=include_unconfigured,
include_non_scheduled=include_non_scheduled,
)
except KeyError:
logger.critical("Bogus configuration file. Missing Altaro_hosts key.")
return Response(
Expand Down
Loading

0 comments on commit a45b347

Please sign in to comment.