#!/usr/bin/env bash
#/ Usage: ghe-backup-mssql
#/ 
#/
#/ Note: This command typically isn't called directly. It's invoked by
#/ ghe-backup.
set -e

# Bring in the backup configuration
# shellcheck source=share/github-backup-utils/ghe-backup-config
. "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config"

# Set up remote host and root backup snapshot directory based on config
backup_dir="$GHE_SNAPSHOT_DIR/mssql"
last_mssql=
backup_command=
backup_type=
full_expire=
diff_expire=
tran_expire=

# Check if the export tool is available in this version
export_tool_available() {
  if [ -z "$GHE_TEST_REMOTE_VERSION" ]; then
    ghe_ssh_mssql "test -e /usr/local/bin/ghe-export-mssql"
  else
    # Always return available for test
    return 0
  fi
}

ghe_ssh_mssql() {
  if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
    ghe-ssh "${opts[@]}" "${ssh_config_file_opt[@]}" "$GHE_MSSQL_PRIMARY_HOST" "$@"
  else
    ghe-ssh "$GHE_HOSTNAME" "$@"
  fi
}

cleanup() {
  rm -rf "$tempdir"
}
trap 'cleanup' EXIT INT

# use the mssql primary host if GHES cluster configuration contains a mssql-master or use the ghe server if the mssql-master is not available.
GHE_MSSQL_PRIMARY_NODE="$(ghe-ssh "$GHE_HOSTNAME" -- "ghe-config cluster.mssql-master" || true)"
GHE_MSSQL_PRIMARY_HOST="$(ghe-ssh "$GHE_HOSTNAME" -- "ghe-config cluster.$GHE_MSSQL_PRIMARY_NODE.hostname" || true)"

if [ -z "$GHE_MSSQL_PRIMARY_HOST" ]; then
  GHE_MSSQL_PRIMARY_HOST="$GHE_HOSTNAME"
fi

tempdir=$(mktemp -d -t backup-utils-backup-XXXXXX)
ssh_config_file_opt=()
opts=()

# get server hostnames under cluster
if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
  ssh_config_file="$tempdir/ssh_config"
  ssh_config_file_opt=("-F" "$ssh_config_file")
  opts=("-o" "UserKnownHostsFile=/dev/null" "-o" "StrictHostKeyChecking=no" "-o" "PasswordAuthentication=no")
  ghe-ssh-config "$GHE_HOSTNAME" "$GHE_MSSQL_PRIMARY_HOST" > "$ssh_config_file"
fi

if ! export_tool_available ; then
  log_error "ghe-export-mssql is not available" 1>&2
  exit 1
fi

add_minute() {
  # Expect date string in the format of yyyymmddTHHMMSS
  # Here parse date differently depending on GNU Linux vs BSD MacOS
  if date -v -1d > /dev/null 2>&1; then
    date -v +"$2"M -ujf'%Y%m%dT%H%M%S' "$1" +%Y%m%dT%H%M%S
  else
    dt=$1
    date -u '+%Y%m%dT%H%M%S' -d "${dt:0:8} ${dt:9:2}:${dt:11:2}:${dt:13:2} $2 minutes"
  fi
}

find_timestamp() {
  filename="${1##*/}"
  IFS='@' read -ra parts <<< "$filename"
  datetime_part=${parts[1]:0:15}
  echo "$datetime_part"
}

actions_dbs() {
  all_dbs=$(echo 'set -o pipefail; ghe-mssql-console -y -n -q "SET NOCOUNT ON; SELECT name FROM sys.databases"' | ghe_ssh_mssql /bin/bash)
  for db in $all_dbs; do
    if [[ ! "$db" =~ ^(master|tempdb|model|msdb)$ ]] && [[ "$db" =~ ^[a-zA-Z0-9_-]+$ ]]; then
      echo "$db"
    fi
  done
}

ensure_same_dbs() {
  locals=()
  while read -r file; do
    filename=$(basename "$file")
    locals+=("$filename")
  done < <(find "$1" \( -name "*.bak" -o -name "*.diff" -o -name "*.log" \))

  for remote in $(actions_dbs); do
    remaining_locals=()
    for local in "${locals[@]}"; do
      if ! [[ "$local" == "$remote"* ]]; then
        remaining_locals+=("$local")
      fi
    done
    locals=("${remaining_locals[@]}")
  done

  if [[ "${#locals[@]}" -ne 0 ]]; then
    log_warn "Warning: Found following ${#locals[@]} backup files that can't be traced back to the specified GHES host."
    log_warn "Warning: Did you recently reconfigure the GHES host? Move or delete these backup files if no longer needed."
    for local in "${locals[@]}"; do
      ghe_verbose "$1/$local"
    done

    exit 1
  fi
}

run_query() {
  echo "set -o pipefail; ghe-mssql-console -y -n -q \"SET NOCOUNT ON; $1\"" | ghe_ssh_mssql /bin/bash | sed 's/^[[:space:]]*//;s/[[:space:]]*$//'
}

get_latest_backup_file() {
  backups_dir=$1
  db=$2
  ext=$3

  latest_full_backup=$(find "$backups_dir" -type f -name "$db*.$ext" | grep -E '[0-9]{8}T[0-9]{6}' | sort | tail -n 1)
  latest_full_backup_file="${latest_full_backup##*/}"
  echo "$latest_full_backup_file"
}

get_backup_val() {
  db=$1
  filename=$2
  column=$3
  run_query "
  SELECT s.$column
  FROM msdb.dbo.backupset s
  JOIN msdb.dbo.backupmediafamily f
  ON s.media_set_id = f.media_set_id
  WHERE s.database_name = '$db' AND f.physical_device_name LIKE '%$filename'"
}

get_backup_checkpoint_lsn() {
  get_backup_val "$1" "$2" "checkpoint_lsn"
}

get_backup_last_lsn() {
  get_backup_val "$1" "$2" "last_lsn"
}

get_next_log_backup_starting_lsn() {
  db=$1
  # last_log_backup_lsn: The starting log sequence number of the next log backup
  # https://docs.microsoft.com/en-us/sql/relational-databases/system-catalog-views/sys-database-recovery-status-transact-sql
  run_query "
    SELECT last_log_backup_lsn
    FROM sys.database_recovery_status drs 
    JOIN sys.databases db on drs.database_id = db.database_id 
    WHERE db.name = '$db'"
}

get_next_diff_backup_base_lsn() {
  db=$1
  # differential_base_lsn: Base for differential backups. Data extents changed after this LSN will be included in a differential backup.
  # https://docs.microsoft.com/en-us/sql/relational-databases/system-catalog-views/sys-master-files-transact-sql
  run_query "
    SELECT differential_base_lsn
    FROM sys.master_files mf
    WHERE mf.name = '$db'"
}

last_mssql=$GHE_DATA_DIR/current/mssql

if [ ! -d "$last_mssql" ] \
  || [ -z "$(find "$last_mssql" -type f -name '*.bak' | head -n 1)" ]; then
  ghe_verbose "Taking first full backup"
  backup_type="full"
else
  ensure_same_dbs "$last_mssql"

  # Check schedule to determine backup type
  IFS=',' read -ra cadence <<< "$GHE_MSSQL_BACKUP_CADENCE"

  current=$(date -u +%Y%m%d%H%M%S)

  full=$(find "$last_mssql" -type f -name "*.bak" | head -n 1)
  full=$(find_timestamp "$full")
  full_expire=$(add_minute "$full" "${cadence[0]}")
  full_expire="${full_expire//T}"

  diff=$(find "$last_mssql" -type f -name "*.diff" | head -n 1)
  if [ -f "$diff" ]; then
    diff=$(find_timestamp "$diff")
    diff_expire=$(add_minute "$diff" "${cadence[1]}")
    diff_expire="${diff_expire//T}"
  else
    diff_expire=$(add_minute "$full" "${cadence[1]}")
    diff_expire="${diff_expire//T}"
  fi

  tran=$(find "$last_mssql" -type f -name "*.log" | grep -E '[0-9]{8}T[0-9]{6}' | sort | tail -1)
  tran=$(find_timestamp "$tran")
  tran_expire=$(add_minute "$tran" "${cadence[2]}")
  tran_expire="${tran_expire//T}"

  ghe_verbose "current $current, full expire $full_expire, \
diff expire $diff_expire, tran expire $tran_expire"

  # Determine the type of backup to take based on expiry time
  if [ "$current" -gt "$full_expire" ]; then
    backup_type='full'
  elif [ "$current" -gt "$diff_expire" ]; then
    backup_type='diff'
  elif [ "$current" -gt "$tran_expire" ]; then
    backup_type='transaction'
  fi

  # Upgrade to a full backup if the diff/transaction backup might not be restorable due to other backup mechanisms interfering
  # with the transaction LSN chain or differential base LSN.
  if [ "$backup_type" == 'diff' ] || [ "$backup_type" == 'transaction' ]; then
    ghe_verbose "Checking for conflicting backups to ensure a $backup_type backup is sufficient"

    for db in $(actions_dbs); do
      # Ensure that a diff backup will be based on the full backup file we have (rather than one another backup mechanism took)
      if [ "$backup_type" == 'diff' ]; then
        full_backup_file=$(get_latest_backup_file "$last_mssql" "$db" "bak")
        if [[ "$full_backup_file" == "" ]]; then
          log_warn "Taking a full backup instead of a diff backup because for $db a full backup file wasn't found"
          backup_type="full"
          break
        fi

        full_backup_file_checkpoint_lsn=$(get_backup_checkpoint_lsn "$db" "$full_backup_file")
        if [[ "$full_backup_file_checkpoint_lsn" = "NULL" ]] || [[ "$full_backup_file_checkpoint_lsn" == "" ]]; then
          log_warn "Taking a full backup instead of a diff backup because for $db the checkpoint LSN for $full_backup_file couldn't be determined"
          backup_type="full"
          break
        fi

        next_diff_backup_base_lsn=$(get_next_diff_backup_base_lsn "$db")
        if [[ "$next_diff_backup_base_lsn" = "NULL" ]] || [[ "$next_diff_backup_base_lsn" == "" ]]; then
          log_warn "Taking a full backup instead of a $backup_type backup because for $db the base LSN for the next diff backup couldn't be determined"
          backup_type="full"
          break
        fi

        # The base of the diff backup we're about to take must exactly match the checkpoint LSN of the full backup file we have
        if [[ "$next_diff_backup_base_lsn" -ne "$full_backup_file_checkpoint_lsn" ]]; then
          log_warn "Taking a full backup instead of a $backup_type backup because for $db the diff would have base LSN $next_diff_backup_base_lsn yet our full backup has checkpoint LSN $full_backup_file_checkpoint_lsn"
          backup_type="full"
          break
        fi
      fi

      # Ensure that a transaction log backup will immediately follow the previous one
      latest_log_backup_file=$(get_latest_backup_file "$last_mssql" "$db" "log")
      if [[ "$latest_log_backup_file" == "" ]]; then
        log_warn "Taking a full backup instead of a $backup_type backup because for $db a previous transaction log backup wasn't found"
        backup_type="full"
        break
      fi

      latest_log_backup_last_lsn=$(get_backup_last_lsn "$db" "$latest_log_backup_file")
      if [[ "$latest_log_backup_last_lsn" = "NULL" ]] || [[ "$latest_log_backup_last_lsn" == "" ]]; then
        log_warn "Taking a full backup instead of a $backup_type backup because for $db the LSN range for $latest_log_backup_file couldn't be determined"
        backup_type="full"
        break
      fi

      next_log_backup_starting_lsn=$(get_next_log_backup_starting_lsn "$db")
      if [[ "$next_log_backup_starting_lsn" = "NULL" ]] || [[ "$next_log_backup_starting_lsn" == "" ]]; then
        log_warn "Taking a full backup instead of a $backup_type backup because for $db the starting LSN for the next log backup couldn't be determined"
        backup_type="full"
        break
      fi

      # The starting LSN of the backup we're about to take must be equal to (or before) the last LSN from the last backup,
      # otherwise there'll be a gap and the logfiles won't be restorable
      if [[ "$next_log_backup_starting_lsn" -gt "$latest_log_backup_last_lsn" ]]; then
        log_warn "Taking a full backup instead of a $backup_type backup because for $db a gap would exist between the last backup ending at LSN $latest_log_backup_last_lsn and next backup starting at $next_log_backup_starting_lsn"
        backup_type="full"
        break
      fi
    done
  fi  
fi

# Make sure root backup dir exists if this is the first run
mkdir -p "$backup_dir"

# Use hard links to "copy" over previous applicable backups to the new snapshot folder to save disk space and time
if [ -d "$last_mssql" ]; then
  for p in "$last_mssql"/*
  do
    [[ -e "$p" ]] || break

    filename="${p##*/}"
    extension="${filename##*.}"
    transfer=

    # Copy full backups unless we're taking a new full backup
    if [ "$extension" = "bak" ] && [ "$backup_type" != 'full' ]; then
      transfer=1
    fi

    # Copy diff backups unless we're taking a new full or diff backup
    if [ "$extension" = "diff" ] && [ "$backup_type" != 'full' ] && [ "$backup_type" != 'diff' ]; then
      transfer=1
    fi

    # Copy transaction log backups unless we're taking a new full or diff backup
    if [ "$extension" = "log" ] && [ "$backup_type" != 'full' ] && [ "$backup_type" != 'diff' ]; then
      transfer=1
    fi

    if [ -n "$transfer" ]; then
      ghe_verbose "Creating hard link to $filename"
      ln "$last_mssql"/"$filename" "$backup_dir"/"$filename"
    fi
  done
fi

if [ -n "$backup_type" ]; then
  ghe_verbose "Taking $backup_type backup"

  backup_command='ghe-export-mssql'
  if [ "$backup_type" = "diff" ]; then
    backup_command='ghe-export-mssql -d'
  elif [ "$backup_type" = "transaction" ]; then
    backup_command='ghe-export-mssql -t'
  fi

  backup_failed=

  bm_start "$(basename "$0")"
  # record if generating the backup failed, this will allow us to collect any backups that may have been produced, even if they are not complete they are better than nothing
  ghe_ssh_mssql -- "$backup_command" || backup_failed='true'
  bm_end "$(basename "$0")"

  # Configure the backup cadence on the appliance, which is used for diagnostics
  ghe_ssh_mssql "ghe-config mssql.backup.cadence $GHE_MSSQL_BACKUP_CADENCE"

  # Transfer backup files from appliance to backup host
  appliance_dir="$GHE_REMOTE_DATA_DIR/user/mssql/backups"
  backups=$(echo "set -o pipefail; if sudo test -d \"$appliance_dir\"; then \
    sudo ls \"$appliance_dir\"; fi" | ghe_ssh_mssql /bin/bash)
  for b in $backups
  do
    ghe_verbose "Transferring to backup host $b"
    ghe_ssh_mssql "sudo cat $appliance_dir/$b" > "$backup_dir"/"$b"
  done

  if [ -n "$backup_failed" ]; then
    log_error 'ghe-export-mssql failed to backup at least one mssql database' 1>&2
    exit 1
  fi
fi
