数据仓库开发

ODS 贴源数据

保持数据原貌,不做任何修改; 创建分区表。

DROP TABLE IF EXISTS ods_event_track_record;
CREATE TABLE IF NOT EXISTS ods_event_track_record (
  one_id string, 
  user_id string comment '用户id', 
  event_code string comment '事件编码', 
  event_time string comment '事件时间', -- yyyy-MM-dd HH:mm:ss
  event_values string comment '事件属性', 
) comment '埋点事件表'
;

DWD 明细数据

数据质量检查; 清洗,去重,去空; 脱敏; 维度退化。

DROP TABLE IF EXISTS dwd_shop_user_access;
CREATE TABLE IF NOT EXISTS dwd_shop_user_access (
  one_id string, 
  user_id string comment '用户id', 
  event_code string comment '商城行为标识',  -- shop_enter
  event_time string comment '商城行为时间'  -- yyyy-MM-dd HH:mm:ss
) comment '商城访问记录表'
;

DROP TABLE IF EXISTS dwd_shop_user_cart;
CREATE TABLE IF NOT EXISTS dwd_shop_user_cart (
  one_id string, 
  user_id string comment '用户id', 
  event_code string comment '商城行为标识',  -- shop_cart
  event_time string comment '商城行为时间',  -- yyyy-MM-dd HH:mm:ss
  product_id string comment '商品id', 
  product_name string comment '商品名称'
) comment '商城购物车记录表'
;

DROP TABLE IF EXISTS dwd_shop_user_order;
CREATE TABLE IF NOT EXISTS dwd_shop_user_order (
  one_id string, 
  user_id string comment '用户id', 
  event_code string comment '商城行为标识',  -- shop_order
  event_time string comment '商城行为时间',  -- yyyy-MM-dd HH:mm:ss
  amount int comment '支付金额'
) comment '商城下单记录表'
;

HIVE SQL:

insert into table dwd_shop_user_access 
select one_id ,user_id ,event_code ,event_time 
from ods_event_track_record
where event_code like 'shop_%'
 and event_time BETWEEN 
DATE_FORMAT(date_sub(current_date,1),'yyyy-MM-dd HH:mm:ss') 
and 
DATE_FORMAT(current_date(),'yyyy-MM-dd HH:mm:ss') ;
;

DWS 轻度汇总数据

维度关联; 打宽;

DROP TABLE IF EXISTS dws_shop_user_access_hourly;
CREATE TABLE IF NOT EXISTS dws_shop_user_access_hourly (
	one_id string, 
	user_id string comment '用户id', 
	
    event_time string comment '商城行为转小时时间', -- yyyymmddHH
	enter_flag int comment '是否进入商城',  -- 0/1
	cart_flag int comment '是否进入购物车行为',  -- 0/1
	order_flag int comment '是否下单行为',  -- 0/1

	country_code string comment '国家代码',
	province_code string comment '省市代码',
	city_code string comment '市县代码'
) comment '用户小时内访问商城记录表'
;

HIVE SQL:

insert into table dws_shop_user_access_hourly 
    select one_id,
    t1.user_id,
    date_format(event_time,'yyyymmddHH') as event_time ,
    (case when event_code ='enter_shop' then 1 else 0 end) as enter_flag ,
    (case when event_code ='shop_cart' then 1 else 0 end) as cart_flag,
    (case when event_code ='shop_order' then 1 else 0 end) as cart_flag ,
    t2.country_code ,
    t2.province_code,
    t2.city_code 
from test.dwd_shop_user_access t1 
   left join test.dim_mdm_shop_user t2 on t1.user_id = t2.user_id
 and event_time BETWEEN 
DATE_FORMAT(date_sub(current_date,1),'yyyy-MM-dd HH:mm:ss') 
and 
DATE_FORMAT(current_date(),'yyyy-MM-dd HH:mm:ss') ;
;

轻度汇总。

DROP TABLE IF EXISTS dws_shop_user_access_count_hourly;
CREATE TABLE IF NOT EXISTS dws_shop_user_access_count_hourly (
	user_id string comment '用户id', 

	hour_period string comment '时段',  -- HH
	enter_count bigint comment '进入次数', 
	cart_count bigint comment '购物车次数', 
	order_count bigint comment '下单次数', 

	country_code string comment '国家代码',
	province_code string comment '省市代码',
	city_code string comment '市县代码'
) comment '用户各时段访问商城行为总次数'
;

HIVE SQL:

insert overwrite table dws_shop_user_access_count_hourly 
select 
    user_id,
    SUBSTR(event_time,-2) as hour_period,
    sum(enter_flag) over (partition by SUBSTR(event_time,-2)) as enter_count,
    sum(cart_flag) over (partition by SUBSTR(event_time,-2)) as cart_count,
    sum(order_flag) over (partition by SUBSTR(event_time,-2)) as order_count,
    country_code,
    province_code,
    city_code 
from dws_shop_user_access_hourly
;

统计指标。

DROP TABLE IF EXISTS dws_shop_user_access_index;
CREATE TABLE IF NOT EXISTS dws_shop_user_access_index (
	user_id string comment '用户id', 
    
	all_enter_count bigint comment '进入次数', 
	all_cart_count bigint comment '购物车次数', 
	all_order_count bigint comment '下单次数', 

	month_avg_enter_count bigint comment '月均进入次数', 
	month_avg_cart_count bigint comment '月均购物车次数', 
	month_avg_order_count bigint comment '月均下单次数', 

	country_code string comment '国家代码',
	province_code string comment '省市代码',
	city_code string comment '市县代码'
) comment '用户各时段访问商城行为总次数'
;

ADS 应用数据服务


DIM 维度

DROP TABLE IF EXISTS dim_area;
CREATE TABLE IF NOT EXISTS dim_area (
	country_code string comment '国家代码',
	country_name string comment '国家名称',
	province_code string comment '省市代码',
	province_name string comment '省市名称',
	city_code string comment '市县代码',
	city_name string comment '市县名称'
) comment '地区维度表';

MDM 主数据

DROP TABLE IF EXISTS mdm_shop_user;
CREATE TABLE IF NOT EXISTS mdm_shop_user (
	user_id string comment '用户id',
	user_name string comment '用户名称',

	country_code string comment '国家代码',
	province_code string comment '省市代码',
	city_code string comment '市县代码'
) comment '商城顾客表'
;

测试数据

dim-area.csv

ods_event_track_record.csv

A1000001,VA01XM01,P01,enter_shop,2021-05-15 12:11:00
A1000002,VA01XM02,P02,enter_shop,2021-05-15 15:12:00
A1000003,VA01XM03,P03,enter_shop,2021-06-15 16:14:00

mdm_shop_user.csv

P01,张XX,CN,BJ,BJ
P02,李XX,CN,SH,SH
P03,王XX,CN,BJ,BJ