Data Vault初探(十) —— 星型模型向Data Vault模型转化

2022-05-07 11:32:12 浏览数 (3)

源数据库模型(3NF)如下图:

星型模型如下图:

Data Vault模型如下图:

说明: 星型模型(star schema)的事实表采取了完全规范化的第三范式(3NF)模型,而维表采取了第二范式的设计模型。有时也会把维表的设计规范化,就成了所谓的雪花模型(snowflake schema)。 星型模型向Data Vault模型转化: 星型模趔的主要构成部分是维表与事实表,在转化为DataVault模型时自然涉及到维表与事实表的分别转化,使之映射为DataVault模型的Hub、Link、Satellite组件。星型模型与DataVault模型表的对应如下表所示。

转换SQL脚本如下:

代码语言:javascript复制
use dv;

-- 装载客户中心表
insert hub_customer (customer_id,record_source) 
select distinct customer_id,'customer_dim' from customer_dim;

-- 装载客户附属表
insert into sat_customer
(
	hub_customer_id,
	load_dts,
	load_end_dts,
	record_source,
	customer_name,
	city_name,
	province_name,
	cust_post_code,
	cust_address,
	ship_post_code,
	ship_address
)
select t1.hub_customer_id,
	   t2.effective_date,
	   t2.expiry_date,
	   'hub_customer,customer_dim',
	   t2.customer_name,
	   t2.city_name,
	   t2.province_name,
	   t2.cust_post_code,
	   t2.cust_address,
	   t2.ship_post_code,
	   t2.ship_address
  from hub_customer t1,customer_dim t2 
 where t1.customer_id=t2.customer_id;
 
-- 装载产品分类中心表
insert hub_product_catagory (product_catagory_id,record_source) 
select distinct product_category_id,'product_dim' from product_dim;

-- 装载产品中心表
insert hub_product (product_id,record_source) 
select distinct product_id,'product_dim' from product_dim;

-- 装载产品分类_产品链接表
insert into link_product_catagory (hub_product_id,hub_product_catagory_id,record_source)
select distinct t1.hub_product_id,t2.hub_product_catagory_id,'hub_product,product_dim,hub_product_catagory'
  from hub_product t1,hub_product_catagory t2,product_dim t3
 where t1.product_id = t3.product_id and t2.product_catagory_id = t3.product_category_id;

-- 装载产品分类附属表
insert into sat_product_catagory 
(hub_product_catagory_id,
load_dts,
load_end_dts,
record_source,
product_catagory_name)
select 
    t1.hub_product_catagory_id,
    t2.start_date,
    t2.end_date,
    'hub_product_catagory,product_dim' record_source,
    t2.cname
from
    hub_product_catagory t1,
    (select 
        t1.cid cid,
            t1.cname cname,
            t1.start_date start_date,
            t2.end_date end_date
    from
        (select distinct
        if(@cid = t1.cid and @cname = t1.cname, @start_date, @start_date:=t1.start_date) as start_date,
            @cid:=t1.cid cid,
            @cname:=t1.cname cname
    from
        ((select 
        product_category_id cid,
            product_category_name cname,
            effective_date start_date
    from
        product_dim
    order by cid , start_date) t1, (select @cid:=0, @cname:='', @start_date:='0000-00-00') t2)) t1, (select distinct
        if(@cid = t1.cid and @cname <> t1.cname, t1.end_date, '2200-01-01') as end_date,
            @cid:=t1.cid cid,
            @cname:=t1.cname cname
    from
        ((select 
        product_category_id cid,
            product_category_name cname,
            expiry_date end_date
    from
        product_dim
    order by cid , end_date desc) t1, (select @cid:=0, @cname:='', @end_date:='0000-0000-00') t2)) t2
    where
        t1.cid = t2.cid and t1.cname = t2.cname) t2
where
    t1.product_catagory_id = t2.cid;

-- 装载产品附属表
insert into sat_product
(
hub_product_id,
load_dts,
load_end_dts,
record_source,
product_name,
unit_price
)
select t1.hub_product_id,t2.start_date,t2.end_date,'hub_product,product_dim' record_source,
t2.pname pname,t2.unit_price
from hub_product t1,
(
select 
    t1.pid pid,
    t1.pname pname,
    t1.unit_price,
    t1.start_date start_date,
    t2.end_date end_date
from
    (select distinct
        if(@pid = t1.pid and @pname = t1.pname
                and @unit_price = t1.unit_price, @start_date, @start_date:=t1.start_date) as start_date,
            @pid:=t1.pid pid,
            @pname:=t1.pname pname,
            @unit_price:=t1.unit_price unit_price
    from
        ((select 
        product_id pid,
            product_name pname,
            unit_price,
            effective_date start_date
    from
        product_dim
    order by pid , start_date) t1, (select 
        @pid:=0,
            @pname:='',
            @unit_price:=0,
            @start_date:='0000-00-00'
    ) t2)) t1,
    (select distinct
        if(@pid = t1.pid
                and (@pname <> t1.pname
                OR @unit_price <> t1.unit_price), t1.end_date, '2200-01-01') as end_date,
            @pid:=t1.pid pid,
            @pname:=t1.pname pname,
            @unit_price:=t1.unit_price unit_price
    from
        ((select 
        product_id pid,
            product_name pname,
            unit_price,
            expiry_date end_date
    from
        product_dim
    order by pid , end_date desc) t1, (select 
        @pid:=0,
            @pname:='',
            @unit_price:=0,
            @end_date:='0000-0000-00'
    ) t2)) t2
where
    t1.pid = t2.pid and t1.pname = t2.pname
        and t1.unit_price = t2.unit_price) t2 where t1.product_id=t2.pid;

-- 装载销售订单中心表
insert into hub_sales_order (sales_order_id,record_source) 
select distinct sales_order_id,'sales_order_dim' from sales_order_dim;

-- 装载销售订单附属表
insert into sat_sales_order
(
hub_sales_order_id,
load_dts,
load_end_dts,
record_source,
order_time,
entry_time,
amount,
allocate_time,
packing_time,
ship_time,
receive_time)
select 
t1.hub_sales_order_id,
t2.effective_date,
t2.expiry_date,
'hub_sales_order,sales_order_dim',
t2.order_time,
t2.entry_time,
t2.amount,
t2.allocate_time,
t2.packing_time,
t2.ship_time,
t2.receive_time
from hub_sales_order t1,sales_order_dim t2
where t1.sales_order_id = t2.sales_order_id;

-- 装载订单产品链接表
insert into link_order_product
(
hub_sales_order_id,
hub_product_id,
record_source)
select t1.hub_sales_order_id, t2.hub_product_id,
'hub_sales_order,hub_product,sales_order_dim,product_dim,sales_order_fact'
from 
hub_sales_order t1,
hub_product t2,
(select t1.sales_order_id sales_order_id, t2.product_id product_id
from sales_order_dim t1, product_dim t2, sales_order_fact t3 
where t1.sales_order_sk = t3.sales_order_sk and t2.product_sk = t3.product_sk) t3
where t1.sales_order_id = t3.sales_order_id and t2.product_id = t3.product_id;

-- 装载订单客户链接表
insert into link_order_customer
(
hub_sales_order_id,
hub_customer_id,
record_source)
select t1.hub_sales_order_id, t2.hub_customer_id,
'hub_sales_order,hub_customer,sales_order_dim,customer_dim,sales_order_fact'
from 
hub_sales_order t1,
hub_customer t2,
(select t1.sales_order_id sales_order_id, t2.customer_id customer_id
from sales_order_dim t1, customer_dim t2, sales_order_fact t3 
where t1.sales_order_sk = t3.sales_order_sk and t2.customer_sk = t3.customer_sk) t3
where t1.sales_order_id = t3.sales_order_id and t2.customer_id = t3.customer_id;

-- 装载订单产品附属表
insert into sat_order_product
(
link_order_product_id,
load_dts,
load_end_dts,
record_source,
unit_price,
quantity
)
select t1.link_order_product_id,t2.effective_date,t2.expiry_date,
'link_order_product,product_dim,sales_order_dim,sales_order_fact,hub_sales_order,hub_product',
t2.unit_price,
t4.quantity
from 
link_order_product t1, 
product_dim t2, 
sales_order_dim t3, 
sales_order_fact t4,
hub_sales_order t5,
hub_product t6
where t1.hub_sales_order_id = t5.hub_sales_order_id
  and t1.hub_product_id = t6.hub_product_id
  and t4.sales_order_sk = t3.sales_order_sk
  and t4.product_sk = t2.product_sk
  and t5.sales_order_id = t3.sales_order_id
  and t6.product_id = t2.product_id;
  
COMMIT;

0 人点赞