DataX 全量采集mysql数据分区存放到HDFS

2024-08-06 14:23:20 浏览数 (2)

脚本概况

该脚本出自尚硅谷数仓6.0

配置文件路径:/opt/module/datax/job/import/

批量配置文件参考该文章

由于多张表需要进行全量采集,我们采用脚本来批量执行

创建脚本

代码语言:shell复制
vim mysql_to_hdfs_full.sh

#!/bin/bash

DATAX_HOME=/opt/module/datax

# 如果传入日期则do_date等于传入的日期,否则等于前一天日期
if [ -n "$2" ] ;then
    do_date=$2
else
    do_date=`date -d "-1 day"  %F`
fi

#处理目标路径,此处的处理逻辑是,如果目标路径不存在,则创建;若存在,则清空,目的是保证同步任务可重复执行
handle_targetdir() {
  hadoop fs -test -e $1
  if [[ $? -eq 1 ]]; then
    echo "路径$1不存在,正在创建......"
    hadoop fs -mkdir -p $1
  else
    echo "路径$1已经存在"
    
  fi
}

#数据同步
import_data() {
  datax_config=$1
  target_dir=$2

  handle_targetdir $target_dir
  python $DATAX_HOME/bin/datax.py -p"-Dtargetdir=$target_dir" $datax_config
}

case $1 in
"activity_info")
  import_data /opt/module/datax/job/import/activity_info.json /origin_data/db/activity_info_full/$do_date
  ;;
"activity_rule")
  import_data /opt/module/datax/job/import/activity_rule.json /origin_data/db/activity_rule_full/$do_date
  ;;
"base_category1")
  import_data /opt/module/datax/job/import/base_category1.json /origin_data/db/base_category1_full/$do_date
  ;;
"base_category2")
  import_data /opt/module/datax/job/import/base_category2.json /origin_data/db/base_category2_full/$do_date
  ;;
"base_category3")
  import_data /opt/module/datax/job/import/base_category3.json /origin_data/db/base_category3_full/$do_date
  ;;
"base_dic")
  import_data /opt/module/datax/job/import/base_dic.json /origin_data/db/base_dic_full/$do_date
  ;;
"base_province")
  import_data /opt/module/datax/job/import/base_province.json /origin_data/db/base_province_full/$do_date
  ;;
"base_region")
  import_data /opt/module/datax/job/import/base_region.json /origin_data/db/base_region_full/$do_date
  ;;
"base_trademark")
  import_data /opt/module/datax/job/import/base_trademark.json /origin_data/db/base_trademark_full/$do_date
  ;;
"cart_info")
  import_data /opt/module/datax/job/import/cart_info.json /origin_data/db/cart_info_full/$do_date
  ;;
"coupon_info")
  import_data /opt/module/datax/job/import/coupon_info.json /origin_data/db/coupon_info_full/$do_date
  ;;
"sku_attr_value")
  import_data /opt/module/datax/job/import/sku_attr_value.json /origin_data/db/sku_attr_value_full/$do_date
  ;;
"sku_info")
  import_data /opt/module/datax/job/import/sku_info.json /origin_data/db/sku_info_full/$do_date
  ;;
"sku_sale_attr_value")
  import_data /opt/module/datax/job/import/sku_sale_attr_value.json /origin_data/db/sku_sale_attr_value_full/$do_date
  ;;
"spu_info")
  import_data /opt/module/datax/job/import/spu_info.json /origin_data/db/spu_info_full/$do_date
  ;;
"promotion_pos")
  import_data /opt/module/datax/job/import/promotion_pos.json /origin_data/db/promotion_pos_full/$do_date
  ;;
"promotion_refer")
  import_data /opt/module/datax/job/import/promotion_refer.json /origin_data/db/promotion_refer_full/$do_date
  ;;
"all")
  import_data /opt/module/datax/job/import/activity_info.json /origin_data/db/activity_info_full/$do_date
  import_data /opt/module/datax/job/import/activity_rule.json /origin_data/db/activity_rule_full/$do_date
  import_data /opt/module/datax/job/import/base_category1.json /origin_data/db/base_category1_full/$do_date
  import_data /opt/module/datax/job/import/base_category2.json /origin_data/db/base_category2_full/$do_date
  import_data /opt/module/datax/job/import/base_category3.json /origin_data/db/base_category3_full/$do_date
  import_data /opt/module/datax/job/import/base_dic.json /origin_data/db/base_dic_full/$do_date
  import_data /opt/module/datax/job/import/base_province.json /origin_data/db/base_province_full/$do_date
  import_data /opt/module/datax/job/import/base_region.json /origin_data/db/base_region_full/$do_date
  import_data /opt/module/datax/job/import/base_trademark.json /origin_data/db/base_trademark_full/$do_date
  import_data /opt/module/datax/job/import/cart_info.json /origin_data/db/cart_info_full/$do_date
  import_data /opt/module/datax/job/import/coupon_info.json /origin_data/db/coupon_info_full/$do_date
  import_data /opt/module/datax/job/import/sku_attr_value.json /origin_data/db/sku_attr_value_full/$do_date
  import_data /opt/module/datax/job/import/sku_info.json /origin_data/db/sku_info_full/$do_date
  import_data /opt/module/datax/job/import/sku_sale_attr_value.json /origin_data/db/sku_sale_attr_value_full/$do_date
  import_data /opt/module/datax/job/import/spu_info.json /origin_data/db/spu_info_full/$do_date
  import_data /opt/module/datax/job/import/promotion_pos.json /origin_data/db/promotion_pos_full/$do_date
  import_data /opt/module/datax/job/import/promotion_refer.json /origin_data/db/promotion_refer_full/$do_date
  ;;
esac

执行脚本

这边填写的日期为路径,达到分区的效果

代码语言:shell复制
mysql_to_hdfs_full.sh  all  2022-06-08

查看结果

这里查看其中一张表的数据进行校对

代码语言:shell复制
hadoop fs -cat /origin_data/db/activity_info_full/2022-06-08/* | zcat

0 人点赞