脚本概况
该脚本出自尚硅谷数仓6.0
配置文件路径:/opt/module/datax/job/import/
批量配置文件参考该文章
由于多张表需要进行全量采集,我们采用脚本来批量执行
创建脚本
代码语言:shell复制vim mysql_to_hdfs_full.sh
#!/bin/bash
DATAX_HOME=/opt/module/datax
# 如果传入日期则do_date等于传入的日期,否则等于前一天日期
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d "-1 day" %F`
fi
#处理目标路径,此处的处理逻辑是,如果目标路径不存在,则创建;若存在,则清空,目的是保证同步任务可重复执行
handle_targetdir() {
hadoop fs -test -e $1
if [[ $? -eq 1 ]]; then
echo "路径$1不存在,正在创建......"
hadoop fs -mkdir -p $1
else
echo "路径$1已经存在"
fi
}
#数据同步
import_data() {
datax_config=$1
target_dir=$2
handle_targetdir $target_dir
python $DATAX_HOME/bin/datax.py -p"-Dtargetdir=$target_dir" $datax_config
}
case $1 in
"activity_info")
import_data /opt/module/datax/job/import/activity_info.json /origin_data/db/activity_info_full/$do_date
;;
"activity_rule")
import_data /opt/module/datax/job/import/activity_rule.json /origin_data/db/activity_rule_full/$do_date
;;
"base_category1")
import_data /opt/module/datax/job/import/base_category1.json /origin_data/db/base_category1_full/$do_date
;;
"base_category2")
import_data /opt/module/datax/job/import/base_category2.json /origin_data/db/base_category2_full/$do_date
;;
"base_category3")
import_data /opt/module/datax/job/import/base_category3.json /origin_data/db/base_category3_full/$do_date
;;
"base_dic")
import_data /opt/module/datax/job/import/base_dic.json /origin_data/db/base_dic_full/$do_date
;;
"base_province")
import_data /opt/module/datax/job/import/base_province.json /origin_data/db/base_province_full/$do_date
;;
"base_region")
import_data /opt/module/datax/job/import/base_region.json /origin_data/db/base_region_full/$do_date
;;
"base_trademark")
import_data /opt/module/datax/job/import/base_trademark.json /origin_data/db/base_trademark_full/$do_date
;;
"cart_info")
import_data /opt/module/datax/job/import/cart_info.json /origin_data/db/cart_info_full/$do_date
;;
"coupon_info")
import_data /opt/module/datax/job/import/coupon_info.json /origin_data/db/coupon_info_full/$do_date
;;
"sku_attr_value")
import_data /opt/module/datax/job/import/sku_attr_value.json /origin_data/db/sku_attr_value_full/$do_date
;;
"sku_info")
import_data /opt/module/datax/job/import/sku_info.json /origin_data/db/sku_info_full/$do_date
;;
"sku_sale_attr_value")
import_data /opt/module/datax/job/import/sku_sale_attr_value.json /origin_data/db/sku_sale_attr_value_full/$do_date
;;
"spu_info")
import_data /opt/module/datax/job/import/spu_info.json /origin_data/db/spu_info_full/$do_date
;;
"promotion_pos")
import_data /opt/module/datax/job/import/promotion_pos.json /origin_data/db/promotion_pos_full/$do_date
;;
"promotion_refer")
import_data /opt/module/datax/job/import/promotion_refer.json /origin_data/db/promotion_refer_full/$do_date
;;
"all")
import_data /opt/module/datax/job/import/activity_info.json /origin_data/db/activity_info_full/$do_date
import_data /opt/module/datax/job/import/activity_rule.json /origin_data/db/activity_rule_full/$do_date
import_data /opt/module/datax/job/import/base_category1.json /origin_data/db/base_category1_full/$do_date
import_data /opt/module/datax/job/import/base_category2.json /origin_data/db/base_category2_full/$do_date
import_data /opt/module/datax/job/import/base_category3.json /origin_data/db/base_category3_full/$do_date
import_data /opt/module/datax/job/import/base_dic.json /origin_data/db/base_dic_full/$do_date
import_data /opt/module/datax/job/import/base_province.json /origin_data/db/base_province_full/$do_date
import_data /opt/module/datax/job/import/base_region.json /origin_data/db/base_region_full/$do_date
import_data /opt/module/datax/job/import/base_trademark.json /origin_data/db/base_trademark_full/$do_date
import_data /opt/module/datax/job/import/cart_info.json /origin_data/db/cart_info_full/$do_date
import_data /opt/module/datax/job/import/coupon_info.json /origin_data/db/coupon_info_full/$do_date
import_data /opt/module/datax/job/import/sku_attr_value.json /origin_data/db/sku_attr_value_full/$do_date
import_data /opt/module/datax/job/import/sku_info.json /origin_data/db/sku_info_full/$do_date
import_data /opt/module/datax/job/import/sku_sale_attr_value.json /origin_data/db/sku_sale_attr_value_full/$do_date
import_data /opt/module/datax/job/import/spu_info.json /origin_data/db/spu_info_full/$do_date
import_data /opt/module/datax/job/import/promotion_pos.json /origin_data/db/promotion_pos_full/$do_date
import_data /opt/module/datax/job/import/promotion_refer.json /origin_data/db/promotion_refer_full/$do_date
;;
esac
执行脚本
这边填写的日期为路径,达到分区的效果
代码语言:shell复制mysql_to_hdfs_full.sh all 2022-06-08
查看结果
这里查看其中一张表的数据进行校对
代码语言:shell复制hadoop fs -cat /origin_data/db/activity_info_full/2022-06-08/* | zcat