清理指定路径日期的数据

keepday=$1
cat database.txt |while read data
do
  echo "$data"
  hdfs dfs -ls $data |awk '{printf "%s\n", $8}' | while read line 
  do
    if [ $line ]; then
      old_version=$(hdfs dfs -ls $line | awk --re-interval -v day="$keepday" 'BEGIN{ five_days_ago=strftime("%F", systime()-day*24*3600) }$8~/[0-9]{4}-[0-9]{2}-[0-9]{2}/{lenth= split($8,arr,"/");if(arr[lenth] && arr[lenth]<five_days_ago) printf "%s\n",$8 }')
      arr=${old_version}
      for version in ${arr[@]}
      do
        echo "DELETE $version"
        hdfs dfs -rm -r -skipTrash $version
      done
    fi
  done
done

你可能感兴趣的:(大数据—Hadoop)