Linux备份-删除指定日期内文件
#!/usr/bin/env bash
source/etc/profile
echo"*************** start filter ***************"
# get befor six month last day
#m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)
#echo ${m0}
#m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)
#echo ${m1}
#m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)
#echo ${m2}
#m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m3}
#m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m4}
#m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m5}
#m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m6}
# 取得当前月的最后一天,访问数组长度:${#m[*]} + ${#m[@]}
m[0]=$(date -d"$(date -d'month'+%Y%m01)-1 day"+%Y%m%d)
echom0 :${m[0]}'month :'${#m[@]}
fornin$(seq011);do
m[$n+1]=$(date -d"$(date -d ${m[$n]} +%Y%m01)-1 day"+%Y%m%d)
echom$[$n+1]:${m[$n+1]}'month :'${#m[*]};
done
echo"****** time :"$(date'+%Y-%m-%d %H:%M:%S')"******"
max_date=0
# get the latest file and copy to hdfs
cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter
fordirin$(ls-l ./ | awk'/^d/{print $NF}')
do
if[[-d$dir&&$dir==*\_*]];then
f_d=$(echo$dir| cut -d \_ -f3| cut -d \. -f1)
if[[$max_date<$f_d]];then
max_date=$f_d
max_filter=$dir
fi
fi
done
echo"max date is :"$max_date
echo"max filter is :"$max_filter
pwd
# 复制最近日期的filter文件到hdfs
hadoop fs-test-e/data/datacenter/run_center_spark_stream/bloom_filters/$max_filter
if[[$?==0]];then
echo"filter is already exist :"$max_filter
else
echo"start hdfs copy"
echo"****** start time :"$(date'+%Y-%m-%d %H:%M:%S')"******"
hadoop fs-put$max_filter/data/datacenter/run_center_spark_stream/bloom_filters
echo"****** end time :"$(date'+%Y-%m-%d %H:%M:%S')"******"
fi
remove_week=$(date -d"$max_date7 days ago"+%Y%m%d)
echo"删除本地序列化文件的日期界限:"$remove_week
remove_date=$(date -d"$max_date30 days ago"+%Y%m%d)
echo"删除文件 和 Hadoop filter 的日期界限:"$remove_date
echo"*************** start remove filter ***************"
forr_dirin$(ls-l ./ | awk'/^d/{print $NF}')
do
if[[-d$r_dir&&$r_dir==*\_*]];then
r_d=$(echo$r_dir| cut -d \_ -f3| cut -d \. -f1)
if[[$r_d<$remove_date]];then
if[[${m[*]}==*$r_d*]];then
cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir
pwd
forf_dirin$(ls*)
do
if[["$f_dir"=="mau_device_all.FILTER.SER"]];then
echo"------ keep mau_filter is:"$f_dir;
else
echo"remove file is:"$f_dir;
rm-r$f_dir
fi
done
cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter
pwd
else
echo"remove filter_dir is:"$r_dir
rm-r$r_dir
fi
elif[[$r_d<$remove_week]];then
if[[$r_d==$m0||$r_d==$m1||$r_d==$m2]];then
cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir
pwd
forf_dirin$(ls*)
do
if[["$f_dir"=="mau_device_all.FILTER.SER"]];then
echo"------ week keep mau_filter is:"$f_dir;
else
if[["$f_dir"==*.FILTER.SER]];then
echo"- last day of month - week remove file is:"$f_dir;
rm-r$f_dir
fi
fi
done
cd/home/hadoop/streaming_run_center/tmp/checkpoint/filter
pwd
else
echo"week remove filter is:"$r_dir
rm-r$r_dir/*.FILTER.SER
fi
fi
fi
done
echo"=============== start remove hdfs filter ==============="
# 删除hdfs上指定日期外的tdid
forh_filterin$(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk'{print $8}')
do
if[[$h_filter==*\_*]];then
h_date=$(echo$h_filter| cut -d \/ -f6| cut -d \_ -f3| cut -d \. -f1)
# echo " hdfs date : "$h_date
# echo " hdfs filter : "$h_filter
if[[${m[*]}==*$h_date*]];then
echo"remain hdfs filter is :"$h_filter
elif[[$h_date<$remove_date]];then
echo"not remain date is :"$h_date
echo"remove hdfs filter is :"$h_filter
hadoop fs-rmr$h_filter
fi
fi
done
echo"-------------- start tdid ---------------"
# 删除小于30天的tdid
cd/home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo
fortdidin$(ls*)
do
if[[$tdid==*\_*]];then
t_d=$(echo$tdid| cut -d \_ -f2| cut -d \. -f1)
if[[$t_d==$max_date||$t_d>$max_date]];then
echo"need copy date :"$t_d
echo"need copy tdid :"$tdid
# 检查tdid是否存在
# hadoop fs -test -e jiaojiao/tdid/$tdid
# if [[ $? == 0 ]]; then
# echo " tdid is already exist,remove it first "
# hadoop fs -rm jiaojiao/tdid/$tdid
# hadoop fs -put $tdid jiaojiao/tdid
# else
# echo " start copy "
# hadoop fs -put $tdid jiaojiao/tdid
# fi
elif[[$t_d<$remove_date]];then
echo"remove tdid :"$tdid
rm$tdid
fi
fi
done
#echo " =============== start remove hdfs tdid =============== "
#for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}')
#do
# if [[ $h_tdid == *\_* ]]; then
# h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d \. -f 1)
# echo $h_date
# echo $h_tdid
# fi
#done