Shell遍历HDFS路径统计层级目录大小

2024-01-10 10:23:25 浏览数 (2)

代码语言:shell复制
#!/bin/bash 
 
workdir=$(cd $(dirname $0); pwd)
date=`date  %Y-%m-%d-%H:%M:%S`
 
init(){
    rm -rf $workdir/hdfs_detail.txt
    touch $workdir/hdfs_detail.txt
    chmod 777 $workdir/hdfs_detail.txt
    echo "[Init Time]:$date" >> $workdir/hdfs_detail.txt
    echo "--" >> $workdir/hdfs_detail.txt
    echo "--" >> $workdir/hdfs_detail.txt
 
}
hdfs_collect(){
    echo "                                        ----[ 汇总数据 ]----                                                " >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    echo "|    总量    |   当前目录   |" >> $workdir/hdfs_detail.txt
    hadoop fs -du / |sort -r -n| awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2);}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2);}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2);}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
}
 
hdfs_detail(){
    echo "                                       ----[ 明细数据 ]----                                                " >> $workdir/hdfs_detail.txt
    echo "" >> $workdir/hdfs_detail.txt
   #大于1T
   #hdfs1=`hadoop fs -du / |awk '{if($1 >1099511627776 && $2 != "/spark2-history"){print $2}}' >> $workdir/hdfsfirst.txt`
 
   for first in `cat $workdir/hdfsfirst.txt`;
   do
       hadoop fs -du  $first  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2);}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2);}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2);}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
   for second in `cat $workdir/hdfsfirst.txt`;
   do
      #大于80G
    #  hadoop fs -du $second |awk '{if($1 >85899345920){print $2}}' >> $workdir/hdfssecond.txt
      hadoop fs -du  $second  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2);}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2);}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2);}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
   for third in `cat $workdir/hdfssecond.txt`;
   do
      #大于50G
      hadoop fs -du $third  |sort $1 -r -n |awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2);}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2);}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2);}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2);}}}}' >> $workdir/hdfs_detail.txt
   done
:<<!
   for line in $hdfs1;
   do
            hadoop fs -du $line |sort -r -n | awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2,"'$line'");}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2,"'$line'");}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2,"'$line'");}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2,"'$line'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
        for line1 in $hdfs2;
        do
          hadoop fs -du $line1 |sort -r -n | awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2,"'$line1'");}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2,"'$line1'");}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2,"'$line1'");}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2,"'$line1'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
           for line2 in $hdfs3;
            do
                hadoop fs -du $line2  |sort -r -n | awk '{size=$1/1024;if(size<1024){printf(".3f KBt%sn",size,$2,"'$line2'");}else{size=size/1024;if(size<1024){printf("33[36m.3f MBt%sn33[0m",size,$2,"'$line2'");}else{size=size/1024;if(size<1024){printf("33[35m.3f GBt%sn33[0m",size,$2,"'$line2'");}else{size=size/1024;printf("33[31m.3f TBt%sn33[0m",size,$2,"'$line2'");}}}}'|head -10 >> $workdir/hdfs_detail.txt
            done
        done
       echo "" >> $workdir/hdfs_detail.txt     
   done
    rm -rf $workdir/hdfsfirst.txt
    rm -rf $workdir/hdfssecond.txt
    rm -rf $workdir/hdfsthird.txt
!
}
init
hdfs_collect
hdfs_detail
echo "SUCCESS"

0 人点赞