Add kvm-qemu backup script

This commit is contained in:
Orsiris de Jong 2024-02-23 16:49:28 +01:00
parent 3cfb20a96c
commit 327fba178a
2 changed files with 315 additions and 0 deletions

View file

@ -0,0 +1,230 @@
#!/usr/bin/env bash
# Script ver 2023112901
#TODO: support modding XML file from offline domains to remove snapshot and replace by backing file after qemu-img commit
# Expects repository version 2 to already exist
# List of machines
# All active machines by default, adding --all includes inactive machines
VMS=$(virsh list --name --all)
# Optional machine selection
#VMS=(some.vm.local some.other.vm.local)
LOG_FILE="/var/log/cube_npv1.log"
ROOT_DIR="/opt/cube"
BACKUP_IDENTIFIER="CUBE-BACKUP-NP.$(date +"%Y%m%dT%H%M%S" --utc)"
BACKUP_FILE_LIST="${ROOT_DIR}/npbackup_cube_file.lst"
NPBACKUP_CONF_FILE_TEMPLATE="${ROOT_DIR}/npbackup.cube.template"
NPBACKUP_CONF_FILE="${ROOT_DIR}/npbackup-cube.conf"
function log {
local line="${1}"
echo "${line}" >> "${LOG_FILE}"
echo "${line}"
}
function ArrayContains () {
local needle="${1}"
local haystack="${2}"
local e
if [ "$needle" != "" ] && [ "$haystack" != "" ]; then
for e in "${@:2}"; do
if [ "$e" == "$needle" ]; then
echo 1
return
fi
done
fi
echo 0
return
}
function create_snapshot {
local vm="${1}"
local backup_identifier="${2}"
# Ignore SC2068 here
# Add VM xml description from virsh
## At least use a umask
virsh dumpxml --security-info $vm > "${ROOT_DIR}/$vm.xml"
echo "${ROOT_DIR}/$vm.xml" >> "$BACKUP_FILE_LIST"
# Get current disk paths
for disk_path in $(virsh domblklist $vm --details | grep file | grep disk | awk '{print $4}'); do
if [ -f "${disk_path}" ]; then
# Add current disk path and all necessary backing files for current disk to backup file list
echo "${disk_path}" >> "$BACKUP_FILE_LIST"
qemu-img info --backing-chain -U "$disk_path" | grep "backing file:" | awk '{print $3}' >> "$BACKUP_FILE_LIST"
log "Current disk path: $disk_path"
else
log "$vm has a non existent disk path: $disk_path. Cannot backup this disk"
# Let's still include this file in the backup list so we are sure backup will be marked as failed
echo "${disk_path}" >> "$BACKUP_FILE_LIST"
fi
done
log "Creating snapshot for $vm"
virsh snapshot-create-as $vm --name "${backup_identifier}" --description "${backup_identifier}" --atomic --quiesce --disk-only >> "$LOG_FILE" 2>&1
if [ $? -ne 0 ]; then
log "Failed to create snapshot for $vm with quiesce option. Trying without quiesce."
virsh snapshot-create-as $vm --name "${backup_identifier}" --description "${backup_identifier}.noquiesce" --atomic --disk-only >> "$LOG_FILE" 2>&1
if [ $? -ne 0 ]; then
log "Failed to create snapshot for $vm without quiesce option. Cannot backup that file."
echo "$vm.SNAPSHOT_FAILED" >> "$BACKUP_FILE_LIST"
else
CURRENT_VM_SNAPSHOT="${vm}"
fi
else
CURRENT_VM_SNAPSHOT="${vm}"
fi
# Get list of snapshot files to delete "make sure we only use CUBE backup files here, since they are to be deleted later
for disk_path in $(virsh domblklist $vm --details | grep file | grep disk |grep "${backup_identifier}" | awk '{print $4}'); do
SNAPSHOTS_PATHS+=($disk_path)
log "Snapshotted disk path: $disk_path"
done
}
function get_tenant {
# Optional extract a tenant name from a VM name. example. myvm.tenant.local returns tenant
local vm="${1}"
# $(NF-1) means last column -1
tenant=$(echo ${vm} |awk -F'.' '{print $(NF-1)}')
# Special case for me
if [ ${tenant} == "npf" ]; then
tenant="netperfect"
fi
# return this
if [ "${tenant}" != "" ] then
echo "${tenant}"
else
echo "unknown_tenant"
fi
}
function run_backup {
local tenant="${1}"
local vm="${2}"
log "Running backup for:" >> "$LOG_FILE" 2>&1
cat "$BACKUP_FILE_LIST" >> "$LOG_FILE" 2>&1
log "Running backup as ${tenant} for:"
cat "$BACKUP_FILE_LIST"
# Run backups
#/usr/local/bin/restic backup --compression=auto --files-from-verbatim "${BACKUP_FILE_LIST}" --tag "${backup_identifier}" -o rest.connections=15 -v >> "$LOG_FILE" 2>&1
# Prepare config file
rm -f "${NPBACKUP_CONF_FILE}"
cp "${NPBACKUP_CONF_FILE_TEMPLATE}" "${NPBACKUP_CONF_FILE}"
sed -i "s%### TENANT ###%${tenant}%g" "${NPBACKUP_CONF_FILE}"
sed -i "s%### SOURCE ###%${BACKUP_FILE_LIST}%g" "${NPBACKUP_CONF_FILE}"
sed -i "s%### VM ###%${vm}%g" "${NPBACKUP_CONF_FILE}"
/usr/local/bin/npbackup --config-file "${NPBACKUP_CONF_FILE}" --backup --force >> "$LOG_FILE" 2>&1
if [ $? -ne 0 ]; then
log "Backup failure"
else
log "Backup success"
fi
}
function remove_snapshot {
local vm="${1}"
local backup_identifier="${2}"
can_delete_metadata=true
for disk_name in $(virsh domblklist $vm --details | grep file | grep disk | grep "${backup_identifier}" | awk '{print $3}'); do
disk_path=$(virsh domblklist $vm --details | grep file | grep disk | grep "${backup_identifier}" | grep "${disk_name}" | awk '{print $4}')
if [ $(ArrayContains "$disk_path" "${SNAPSHOTS_PATHS[@]}") -eq 0 ]; then
log "No snapshot found for $vm"
fi
# virsh blockcommit only works if machine is running, else we need to use qemu-img
if [ "$(virsh domstate $vm)" == "running" ]; then
log "Trying to online blockcommit for $disk_name: $disk_path"
virsh blockcommit $vm "$disk_name" --active --pivot --verbose --delete >> "$LOG_FILE" 2>&1
else
log "Trying to offline blockcommit for $disk_name: $disk_path"
qemu-img commit -dp "$disk_path" >> "$LOG_FILE" 2>&1
log "Note that you will need to modify the XML manually"
# TODO: test2
virsh dumpxml --inactive --security-info "$vm" > "${ROOT_DIR}/$vm.xml.temp"
sed -i "s%${backup_identifier}//g" "${ROOT_DIR}/$vm.xml.temp"
virsh define "${ROOT_DIR}/$vm.xml.temp"
rm -f "${ROOT_DIR}/$vm.xml.temp"
##TODO WE NEED TO UPDATE DISK PATH IN XML OF OFFLINE FILE
fi
if [ $? -ne 0 ]; then
log "Failed to flatten snapshot $vm: $disk_name: $disk_path"
can_delete_metadata=false
else
# Delete if disk is not in use
if [ -f "$disk_path" ]; then
log "Trying to delete $disk_path"
if ! lsof "$disk_path" > /dev/null 2>&1; then
log "Deleting file ${disk_path}"
rm -f "$disk_path"
else
log "File $disk_path is in use"
fi
fi
CURRENT_VM_SNAPSHOT=""
fi
done
# delete snapshot metadata
if [ $can_delete_metadata == true ]; then
log "Deleting metadata from snapshot ${backup_identifier} for $vm"
virsh snapshot-delete $vm --snapshotname "${backup_identifier}" --metadata >> "$LOG_FILE" 2>&1
if [ $? -ne 0 ]; then
log "Cannot delete snapshot metadata for $vm: ${backup_identifier}"
fi
else
log "Will not delete metadata from snapshot ${backup_identifier} for $vm"
fi
}
function run {
for vm in ${VMS[@]}; do
# Empty file
: > "$BACKUP_FILE_LIST"
CURRENT_VM_SNAPSHOT=""
log "Running backup for ${vm}"
SNAPSHOTS_PATHS=()
create_snapshot "${vm}" "${BACKUP_IDENTIFIER}"
tenant=$(get_tenant "${vm}")
run_backup "${tenant}" "${vm}"
if [ "${CURRENT_VM_SNAPSHOT}" != "" ]; then
remove_snapshot "${CURRENT_VM_SNAPSHOT}" "${BACKUP_IDENTIFIER}"
fi
done
}
function cleanup {
if [ "${CURRENT_VM_SNAPSHOT}" != "" ]; then
remove_snapshot "${CURRENT_VM_SNAPSHOT}" "${BACKUP_IDENTIFIER}"
fi
exit
}
function main {
# Make sure we remove snapshots no matter what
trap 'cleanup' INT HUP TERM QUIT ERR EXIT
log "#### Running backup `date`" >> "$LOG_FILE" 2>&1
[ ! -d "${ROOT_DIR}" ] && mkdir "${ROOT_DIR}"
run
}
# SCRIPT ENTRY POINT
main

View file

@ -0,0 +1,85 @@
# NPBackup config file for npbackup v2.2
# (C) 2022-2023 NetInvent
backup:
compression: auto
exclude_caches: true
exclude_files:
#- excludes/generic_excluded_extensions
#- excludes/generic_excludes
#- excludes/windows_excludes
# - excludes/linux_excludes
exclude_case_ignore: false # Exclusions will always have case ignored on Windows systems regarless of this setting
one_file_system: true
## Paths can contain multiple values, one per line, without quotation marks
paths: ### SOURCE ###
source_type: files_from_verbatim
use_fs_snapshot: false # Use VSS snapshot on Windows (needs administrator rights), will fallback to non VSS on failure
ignore_cloud_files: false # Don't complain when pointers to files in cloud (onedrive, nextcloud...) cannot be backed up
pre_exec_command: ''
pre_exec_timeout: 3600
pre_exec_failure_is_fatal: false
post_exec_command: ''
post_exec_timeout: 3600
post_exec_failure_is_fatal: false
tags: ### VM ###
additional_parameters:
priority: low
repo:
repository:
password:
password_command:
# Backup age, in minutes, which is the minimum time between two backups
minimum_backup_age: 0
upload_speed: 0 # in KiB, use 0 for unlimited upload speed
download_speed: 0 # in KiB, use 0 for unlimited download speed
backend_connections: 0 # Fine tune simultaneous connections to backend, use 0 for standard configuration
identity:
# ${HOSTNAME} is a variable containing the hostname as exposed by platform.node()
# ${RANDOM}[n] is a variable containing 'n' random alphanumeric char
machine_id: ${HOSTNAME}
machine_group:
prometheus:
## Supervision
metrics: true
# Available variables: ${HOSTNAME}, ${RANDOM}[n], ${MACHINE_ID}, ${MACHINE_GROUP}, ${BACKUP_JOB}
backup_job: ### VM ###
# Prometheus metrics destination can be a http / https server with optional basic authentication (pushgateway), or a file path for node textfile collector to pickup
# example: https://push.monitoring.example.tld/metrics/job/npbackup
# example: https://push.monitoring.example.tld/metrics/job/${BACKUP_JOB} where ${BACKUP_JOB} is defined in prometheus_backup_job
# example: /var/lib/prometheus/collector/mytextfile
destination:
no_cert_verify: false
# prometheus metrics upload password
# private keys
http_username:
http_password:
# prometheus instance, becomes exported_instance when using a push gateway
instance: ### VM ###
# Arbitrary group to filter later backups on
group: ${MACHINE_GROUP}
# Additional prometheus labels
additional_labels:
- tenant=### TENANT ###
- backup_type=server
env:
variables:
# - SOME_ENV=Value
options:
auto_upgrade: true
auto_upgrade_server_url:
auto_upgrade_server_username:
auto_upgrade_server_password:
# every 10 NPBackup runs, we'll try an autoupgrade. Never set this lower than 2 since failed upgrades will prevent backups from succeeding
auto_upgrade_interval: 10
# Available variables: ${HOSTNAME}, ${RANDOM}[n], ${MACHINE_ID}, ${MACHINE_GROUP}, ${BACKUP_JOB}
auto_upgrade_host_identity: ${MACHINE_ID}
auto_upgrade_group: ${MACHINE_GROUP}
backup_admin_password: