Linux备份-删除指定日期内文件

#!/usr/bin/env bash

source/etc/profile

echo"*************** start filter ***************"

# get befor six month last day

#m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

#echo ${m0}

#m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)

#echo ${m1}

#m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)

#echo ${m2}

#m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m3}

#m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m4}

#m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m5}

#m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)

#echo ${m6}

# 取得当前月的最后一天,访问数组长度:${#m[*]} + ${#m[@]}

m[0]=$(date -d"$(date -d'month'+%Y%m01)-1 day"+%Y%m%d)

echom0 :${m[0]}'month :'${#m[@]}

fornin$(seq011);do

m[$n+1]=$(date -d"$(date -d ${m[$n]} +%Y%m01)-1 day"+%Y%m%d)

echom$[$n+1]:${m[$n+1]}'month :'${#m[*]};

done

echo"****** time :"$(date'+%Y-%m-%d %H:%M:%S')"******"

max_date=0

# get the latest file and copy to hdfs

cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter

fordirin$(ls-l ./ | awk'/^d/{print $NF}')

do

if[[-d$dir&&$dir==*\_*]];then

f_d=$(echo$dir| cut -d \_ -f3| cut -d \. -f1)

if[[$max_date<$f_d]];then

max_date=$f_d

max_filter=$dir

fi

fi

done

echo"max date is :"$max_date

echo"max filter is :"$max_filter

pwd

# 复制最近日期的filter文件到hdfs

hadoop fs-test-e/data/datacenter/run_center_spark_stream/bloom_filters/$max_filter

if[[$?==0]];then

echo"filter is already exist :"$max_filter

else

echo"start hdfs copy"

echo"****** start time :"$(date'+%Y-%m-%d %H:%M:%S')"******"

hadoop fs-put$max_filter/data/datacenter/run_center_spark_stream/bloom_filters

echo"****** end time :"$(date'+%Y-%m-%d %H:%M:%S')"******"

fi

remove_week=$(date -d"$max_date7 days ago"+%Y%m%d)

echo"删除本地序列化文件的日期界限:"$remove_week

remove_date=$(date -d"$max_date30 days ago"+%Y%m%d)

echo"删除文件 和 Hadoop filter 的日期界限:"$remove_date

echo"*************** start remove filter ***************"

forr_dirin$(ls-l ./ | awk'/^d/{print $NF}')

do

if[[-d$r_dir&&$r_dir==*\_*]];then

r_d=$(echo$r_dir| cut -d \_ -f3| cut -d \. -f1)

if[[$r_d<$remove_date]];then

if[[${m[*]}==*$r_d*]];then

cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

pwd

forf_dirin$(ls*)

do

if[["$f_dir"=="mau_device_all.FILTER.SER"]];then

echo"------ keep mau_filter is:"$f_dir;

else

echo"remove file is:"$f_dir;

rm-r$f_dir

fi

done

cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter

pwd

else

echo"remove filter_dir is:"$r_dir

rm-r$r_dir

fi

elif[[$r_d<$remove_week]];then

if[[$r_d==$m0||$r_d==$m1||$r_d==$m2]];then

cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

pwd

forf_dirin$(ls*)

do

if[["$f_dir"=="mau_device_all.FILTER.SER"]];then

echo"------ week keep mau_filter is:"$f_dir;

else

if[["$f_dir"==*.FILTER.SER]];then

echo"- last day of month - week remove file is:"$f_dir;

rm-r$f_dir

fi

fi

done

cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter

pwd

else

echo"week remove filter is:"$r_dir

rm-r$r_dir/*.FILTER.SER

fi

fi

fi

done

echo"=============== start remove hdfs filter ==============="

# 删除hdfs上指定日期外的tdid

forh_filterin$(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk'{print $8}')

do

if[[$h_filter==*\_*]];then

h_date=$(echo$h_filter| cut -d \/ -f6| cut -d \_ -f3| cut -d \. -f1)

# echo " hdfs date : "$h_date

# echo " hdfs filter : "$h_filter

if[[${m[*]}==*$h_date*]];then

echo"remain hdfs filter is :"$h_filter

elif[[$h_date<$remove_date]];then

echo"not remain date is :"$h_date

echo"remove hdfs filter is :"$h_filter

hadoop fs-rmr$h_filter

fi

fi

done

echo"-------------- start tdid ---------------"

# 删除小于30天的tdid

cd/home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo

fortdidin$(ls*)

do

if[[$tdid==*\_*]];then

t_d=$(echo$tdid| cut -d \_ -f2| cut -d \. -f1)

if[[$t_d==$max_date||$t_d>$max_date]];then

echo"need copy date :"$t_d

echo"need copy tdid :"$tdid

# 检查tdid是否存在

# hadoop fs -test -e jiaojiao/tdid/$tdid

# if [[ $? == 0 ]]; then

# echo " tdid is already exist,remove it first "

# hadoop fs -rm jiaojiao/tdid/$tdid

# hadoop fs -put $tdid jiaojiao/tdid

# else

# echo " start copy "

# hadoop fs -put $tdid jiaojiao/tdid

# fi

elif[[$t_d<$remove_date]];then

echo"remove tdid :"$tdid

rm$tdid

fi

fi

done

#echo " =============== start remove hdfs tdid =============== "

#for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}')

#do

# if [[ $h_tdid == *\_* ]]; then

# h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d \. -f 1)

# echo $h_date

# echo $h_tdid

# fi

#done