#!/bin/bash
########################################
# script for incremental backup
########################################
# idea: easy to use and easy to understand!
# what it does: does a backup of SOURCE into BACKUPDIR
# if there is no timestamp (snar) file do a full backup. if there is a recent snar file, do a icremental backup
# backup files are named like that: backup_HOSTNAME_17112021_1200_Llevel.tgz for a backup file which was
# created on 17-11-2021 at 12:00h. on a system with hostname HOSTNAME.. backuplevel level.
# backup level 0 means full backup.. level 1 and more are incremental backups..
# When month changes, the files for last month will be moved to a ARCHIVDIR/112021 and 
# then the next full backup will be performed to the now empty BACKUPDIR.
#TODO: 
#   - support for more then one logfiles, e.g. one on the backup drive and one localy
#   - remote backup via sftp e.g. (my raspi is not set up right now.. will come soon)
#   - possibility of not saving absolute paths.. ? what do you mean? difficult to understand..
#   - tar errors in log.. 
#   - progress bar e.g. tar feature --checkpoint or pv .. but that cant be possible, because tar doesnt know the size of
#       what to backup BEFORE the backup..
#   - more comments
#   - check size of logfile.. 
#   - what do we do if tar exits with an error code 2? do we unlink the created file? yes.. we should
#   - there is shitty thing how tar stores directories even if they are unchanged .. see also end of script at the tar invoke
#
#   you see: there are still some questions to answer, issues to solve.. but fileformat and filenames wont change hopefully
#   for now its a fast way to do important backups..

#+++++ SETTINGS +++++#
# backup file prefix
PREFIX=backup_$(hostname -s)

# where the backup should go to
BACKUPDIR="/media/backups"            # backup dir

# where the monthly rollover should go to
ARCHIVDIR="/media/backups"            # archive dir .. same as backup dir here

# what to backup
SOURCE="/home/flo"                              # what to backup

# exclude .. you can quote relative or absolute path..
EXCLUDE=".mozilla .thumbnails .vice .vim .viminfo Videos Musik .gnome2_private .Xauthority .Xdefaults .bash_history .bash_logout .bashrc"


# your email address if you want email log..
#EMAILADDRESS="mail@mail.de"

# log files
LOGFILES=backit.log     # log files.. maybe one remote on the backup drive one here..

# backup level possible values 0, 1, 2 , N ..  -> 0, 1, N are supported levels right now!
LEVEL="N"

# archive period .. "monthly" or "none" are the supported methods
# monthly means: old backups are move in folders for each month. e.g. $BACKUPDIR/092022/
# none: old backup files wont be moved at all..
ARCHIVE="monthly"

# log to stdout
#LOG2STDOUT=1            # log to screen
#+++++ ok i think we are set now +++++#


#-----the following is not part of the settings -----
# exclude backup dir! because of obvious reasons.. a loop will happen and probably a black hole will appear..
EXCLUDE+=" "$BACKUPDIR
EXCLUDE+=" "$ARCHIVDIR

DATETIME="$(date +%d%m%Y_%H%M)"                 # datetime for backup files
DATEOFTODAY="$(date +%d%m%Y)"                   # date 
DATETIMEHR="date \"+%d-%m-%Y %H:%M:%S\""        # date human readable for log (it is evaluated later.. in log function)

LOG2FILE=1              # log to file
LOG2MAIL=1              # log to email
if [ -z $EMAILADDRESS ]; then
    unset LOG2MAIL
fi
if [ -z $LOGFILES ]; then
    unset LOG2FILE
fi

#for debug reasons..
#skip_mv=1               # skip mv command
#skip_tar=1             # skip tar command
#skip_mkdir=1            # skip mkdir command
#skip_cp=1               #skip cp command

# log to stdout will be bold
bold=$(tput bold)
normal=$(tput sgr0)

log () {
    dthr=$(eval $DATETIMEHR)
    if [ $LOG2FILE ]; then
        echo -e "BACKIT: $dthr <$1> $2" >> $LOGFILES
    fi
    if [ $LOG2MAIL ]; then
        echo -e "$2" | mail -s "BACKIT: $dthr <$1>" $EMAILADDRESS
    fi
    if [ $LOG2STDOUT ]; then
        echo -e "${bold}$dthr <$1> $2${normal}"
    fi
}

#returns human readable time stamp of given backup file
getHumanReadableTimeStamp () {
    HRTS=${1##*/${PREFIX}_}
    HRTS=${HRTS%%_L*.*}
    echo $HRTS
}

#returns human readable date stamp of given backup file
getHumanReadableDate () {
    HRTS=${1##*/${PREFIX}_}
    HRTS=${HRTS%%_????_L*.*}        # cut time away..
    echo $HRTS
}

#find most recent tgz file with level $1
findMostRecentBackupFile () {
    if [ -z "$1" ]; then            # why this?
       set -- "L*"                  # in $1 is the level information, which level backup we are looking for..
    fi                              # if you dont state $1 then search for L* which means all levels..
    shopt -s nullglob               # you have to google that.. too complicated to explain.
    mostrecentUTS=0                 # unix time stamp
    for tgzfile in ${BACKUPDIR}/${PREFIX}_*_$1.tgz; do  # now loop through tgz files an find the most recent one..
        #echo "Working on $tgzfile"
        HRTS=$(getHumanReadableTimeStamp $tgzfile)
        UTS=$(date -d "${HRTS:2:2}/${HRTS:0:2}/${HRTS:4:4} ${HRTS:9:4}" +%s) # unix time stamp of tgzfile
        if [ $UTS -gt $mostrecentUTS ]; then
            mostrecentUTS=$UTS
            mostrecentTgz=$tgzfile
        fi
    done
    shopt -u nullglob                           # you have to google that.. too complicated to explain.
    echo ${mostrecentTgz} 
}

#checkpreconditions
checkpreconditions () {
    #check if backup directory exists..
    if [ ! -d $BACKUPDIR ]; then
        echo "backup dir $BACKUPDIR doesn't exists. Mount drive or create folder.."
    fi  
    #check if archive dir exists..
    if [ ! -d $ARCHIVDIR ]; then
        echo "archive dir $ARCHIVDIR doesn't exists. Mount drive or create folder"
    fi  
    #check if dir to backup exists..
    if [ ! -d $SOURCE ]; then
        echo "backup source dir $SOURCE doesn't exists. Mount drive or create folder"
    fi  
    if ! command -v tar &> /dev/null; then
        echo "tar could not be found"
    fi
    if [ $LOG2MAIL ]; then
        if ! command -v mail &> /dev/null; then
            echo "mail could not be found"
        else
            mail > /dev/null 2>&1
            if [ $? != 0 ]; then
                echo "mail is not set up correctly!"
            fi
        fi
    fi
}

nostdout="0"
#echo to stdout
echo2stdout () {
    if [[ ! $nostdout == 1 ]]; then
        echo $@
    fi
}

##** main starts here **##
if [ -z "$1" ]; then                # no argument at all.. print help screen
    echo "Flos backup script. Nothing else."
    echo -e "${bold}USAGE: ./backup [doit|list] [OPTION]${normal}"
    echo "  doit .. doit"
    echo "  list .. list current backup files"
    echo "OPTIONS: "
    echo "  --force ONLY in combination with doit command, will do a backup even if there was already a"
    echo "    backup on the very same day.. sometimes your work is THAT important :)."
    echo "  --nostdout suppresses output to stdout. for cron jobs e.g. Be aware: only the normal program output"
    echo "    is omitted. If log to stdout is turned on it will still log to stdout! This is because you might"
    echo "    want to pipe the log output to somewhere.."
    echo "  -v verbose"
    echo "  --dry-run"
    echo ""
    echo "But how do I get my data back? Short answer: Its hard. Long answer: type ./restore"
    echo ""
fi

problems=$(checkpreconditions)
if [ ! -z "$problems" ]; then
    echo "Problems"
    echo "--------"
    echo "$problems"
    exit 1
fi

if [ -z "$1" ]; then
    echo "Summary"
    echo "Backup  dir is $BACKUPDIR"
    echo "Archive dir is $ARCHIVDIR"
    echo "Source  dir is $SOURCE"
    exit
fi


for cla in $@; do                       # loop through command line arguments
    if [ "$cla" == "list" ]; then       # list command
        ls -lh $BACKUPDIR
        exit                            # exit
    elif [ "$cla" == "doit" ]; then     # unknown command?
        dobackup=1
    elif [ "$cla" == "--force" ]; then
        force_backup=1
    elif [ "$cla" == "--nostdout" ]; then
        nostdout=1
        #unset LOG2STDOUT
    elif [ "$cla" == "-v" ]; then
        verbose="-v"
    elif [ "$cla" == "--dry-run" ]; then
        skip_mv=1
        skip_tar=1
        skip_mkdir=1     
        skip_cp=1
    else
        echo2stdout "what do you want, my friend?"
        exit 1                          # die, but not silent.. 
    fi
done

if [ ! $dobackup ]; then                # you should state a command!
    echo2stdout "missing command"
    exit 1                              # die, but not silent
fi

echo2stdout "* Starting backup script at $(eval $DATETIMEHR)"
echo2stdout "Backup  dir is $BACKUPDIR"
echo2stdout "Archive dir is $ARCHIVDIR"
echo2stdout "Source  dir is $SOURCE"
log info "Starting backup script with backup dir: $BACKUPDIR"
 
if [ $ARCHIVE == "monthly" ]; then
    #move backupfiles in the right folders..
    shopt -s nullglob                                   # you have to google that.. too complicated to explain.
    monthyearOfToday=${DATEOFTODAY:2:6}
    for file in $BACKUPDIR/${PREFIX}_*.tgz;    # go through backup files 
       do          
            TS=$(getHumanReadableTimeStamp $file)       # get date of file
            monthyear=${TS:2:6}                         # monthyear of file
            if [ $monthyear != $monthyearOfToday ]; then       # monthyearOfFirstFile will be set at the first file
                if [ ! -d ${ARCHIVDIR}/$monthyear ]; then
                    echo2stdout "mkdir ${ARCHIVDIR}/$monthyear"
                    if [ ! $skip_mkdir ]; then
                        mkdir ${ARCHIVDIR}/$monthyear
                    fi
                fi
                snarfile=${file%.tgz}
                snarfile=$snarfile".snar"
                echo2stdout "mv $file ${ARCHIVDIR}/$monthyear"
                echo2stdout "mv $snarfile ${ARCHIVDIR}/$monthyear"
                if [ ! $skip_mv ]; then
                    mv $file ${ARCHIVDIR}/$monthyear
                    mv $snarfile ${ARCHIVDIR}/$monthyear
                fi
            fi 
       done
    shopt -u nullglob           # unset nullglob
fi

# did we already do a backup today?
if [ "$(getHumanReadableDate $(findMostRecentBackupFile))" == $DATEOFTODAY ]; then  # do daydone
    if [ ! $force_backup ]; then
        echo2stdout "* Backup done for today. Try me tomorrow :)"
        log info "Backup already done for today. No action necessary."
        exit
    else
        #this guy wants a backup again! On the same day again! who the fuck thinks this guy he is?
        echo2stdout "forced backup"
    fi
fi


# which level backup do we perform? in $LEVEL
# - if backupdir is empty (ignore dirs and log file) do: LEVEL0 in any case
# - if LEVEL0 > just do a backup with new (non-existing) snar file (or no snar file at all, means no -g option) 
# - if LEVEL1 
#    > look for a tgz with mark LEVEL0 AND a (datewise) corresponding snar file AND and this files should be the most recent 
#    > (datewise) level 0 file!
#    * copy snar file and rename it (because it will be altered) and invoke tar with the copy
#    ? what if there are more then one level 0 files? !take the most recent one!
#    ? what if there is no level0 backup? !perform level0 backup
# - if LEVEL2 
#    > look for a tgz with mark LEVEL1 AND a (datewise) corresponding snar file AND it should be the most recent
#    * copy snar file and rename it (because it will be altered) and invoke tar with the copy
#    ? what if there are more then one level 1 file? ! take most recent one!
#    ? what if there is no level 1 file? !Perform a level 1 backup.
# - if LEVELN 
#    > look for the most recent tgz  and take its level and perform level+1! 
#    * copy snar file and rename it (because it will be altered) and invoke tar with the copy
#    ? what if the most recent file is level0? !doesnt matter, then perform level 1



getLevel () {
    l=${1##*/${PREFIX}_????????_????_L}
    l=${l%%.*}
    echo $l
}


if [ $LEVEL == 0 ]; then
    PERFORMLEVEL=0          # that was easy
elif [ $LEVEL == "N" ]; then
    mostrecentTgz=$(findMostRecentBackupFile L*)
    if [ ! -z $mostrecentTgz ]; then
        HRTS=$(getHumanReadableTimeStamp $mostrecentTgz)
        l=$(getLevel $mostrecentTgz)
        if [ $(find $BACKUPDIR -maxdepth 1 -type f -name "${PREFIX}_${HRTS}_L${l}.snar" | wc -l) -eq 1 ]; then
            # ok snar is there
            PERFORMLEVEL=$((l+1))        
            SNARFILE="${PREFIX}_${DATETIME}_L${PERFORMLEVEL}.snar"
            if [ ! $skip_cp ]; then
                cp ${BACKUPDIR}/"${PREFIX}_${HRTS}_L${l}.snar" ${BACKUPDIR}/$SNARFILE
            fi
            echo2stdout "cp ${BACKUPDIR}/${PREFIX}_${HRTS}_L${l}.snar ${BACKUPDIR}/$SNARFILE"
            #skip_tar=1  # i am doing this. because tar will do its work even with an non-existent snar file.. 
        else
            echo "puh.. snar file missing.. or too many.."
            echo ".. you have to unravel this mess by yourself. It wasn't me who did it!"
            exit 1
        fi
    else
        PERFORMLEVEL=0
        echo "Do level 0 backup!"
    fi
elif [ $LEVEL == 1 ]; then
    # search for the most recent level 0 tgz.
    mostrecentTgz=$(findMostRecentBackupFile L0)
    if [ ! -z $mostrecentTgz ]; then
        # ok.. now: is there a corresponding snar file?
        HRTS=$(getHumanReadableTimeStamp $mostrecentTgz)
        if [ $(find $BACKUPDIR -maxdepth 1 -type f -name "${PREFIX}_${HRTS}_L0.snar" | wc -l) -eq 1 ]; then
            #ok there is a snar which fits..
            #anything else? no i think all our criterias are complied..
            PERFORMLEVEL=1
            SNARFILE="${PREFIX}_${DATETIME}_L1.snar"
            if [ ! $skip_cp ]; then
                cp ${BACKUPDIR}/"${PREFIX}_${HRTS}_L0.snar" ${BACKUPDIR}/$SNARFILE
            fi
            echo2stdout "cp ${BACKUPDIR}/"${PREFIX}_${HRTS}_L0.snar" ${BACKUPDIR}/$SNARFILE"
                #skip_tar=1  # i am doing this. because tar will do its work even with an non-existent snar file.. 
        else
            echo "not the right snar file found. cant do a level 1 backup"
            echo "Please, use your magic human brain to resolve this.."
            exit 1
        fi
    else
        # preconditions for level 1 not complied!
        echo "Preconditions for level 1 not complied"
        echo "No Level 0 backup found!"
        PERFORMLEVEL=0
    fi
fi

#name of backupfiles..
BACKUPFILE=${PREFIX}_${DATETIME}_L${PERFORMLEVEL}.tgz
SNARFILE=${PREFIX}_${DATETIME}_L${PERFORMLEVEL}.snar

for file in $EXCLUDE; do            # we have to separate excludes and ..
    EXCLUDEDIR+="--exclude=$file "  # append it to --exclude 
done

log info "Performing level $PERFORMLEVEL backup from $SOURCE to ${BACKUPDIR}/${BACKUPFILE}. snar file = ${BACKUPDIR}/${SNARFILE}. Exclude = $EXCLUDE"

#++ now we finally do our backup .. puh..  was a long journey till here
echo2stdout "tar $EXCLUDEDIR $verbose -cpzf ${BACKUPDIR}/${BACKUPFILE} -g ${BACKUPDIR}/${SNARFILE} $SOURC"
if [ ! $skip_tar ]; then
    # right now, tar stores all directories even if the files inside are unchanged and not stored!
    # that is shitty, because it wastes a lot of space.. havent find a workaround right now..

    # EXCLUDEDIR need to be the FIRST argument! It doesn't work otherwise
 
    # even for a level 0 backup we create a snar file.. why? so we have the possibility the change level later..
    # even more: we keep snar files for every old backup file, even for level n backups! they are small compared to the tgz..

    shopt -s lastpipe  # https://newbedev.com/get-exit-code-of-process-substitution-with-pipe-into-while-loop
    shopt -so pipefail 

    tar $EXCLUDEDIR $verbose -cpzf ${BACKUPDIR}/${BACKUPFILE} -g ${BACKUPDIR}/${SNARFILE} $SOURCE 2>&1 |
        while read i; do
            echo2stdout $i  # to stdout
            TOMAIL+="$i"
        done
    # okay that was easy peasy
    e=$?                    # exit code of tar..
    if [ $e -eq 0 ]; then   # all good
        stored=`du -h $BACKUPDIR/${BACKUPFILE} | cut -f 1`
        echo2stdout "* Backup done at $(eval $DATETIMEHR). Stored $stored" 
        log info "Backup was successfull! Stored $stored.\n $TOMAIL"
    fi
    if [ $e -eq 1 ]; then   # all pretty good
        log info "tar exited with error code 1: Some files changed while backup was processed!\n $TOMAIL"   # not as bad as it sounds..
        # or is it that bad? What if something like INBOX file has changed? is it corrupt then?
        # not sure what we have to do now..
        # maybe we shold grab the changed files and backup then again? but is that possible? can i modify a
        # compressed tar? probably not..
        exit 1              # die but not silent..
    fi
    if [ $e -ge 2 ]; then   # we got a problem here
        log fatal "Couldn't do the backup! tar exited with code $e.\n There might be a compromised file left!\n $TOMAIL"
        # shit: what we gonna do with the compromised file if there is one
        exit 1              # die, but not silent
    fi
fi

exit                        # final exit

# just for me.. the writer of this script
# tar 
# -c create
# -p preserve permissions
# -z use gzip
# -f file 
# -t list
# -v verbose
# -x extract

