数据仓库开发
ODS 贴源数据
保持数据原貌,不做任何修改; 创建分区表。
DROP TABLE IF EXISTS ods_event_track_record;
CREATE TABLE IF NOT EXISTS ods_event_track_record (
one_id string,
user_id string comment '用户id',
event_code string comment '事件编码',
event_time string comment '事件时间', -- yyyy-MM-dd HH:mm:ss
event_values string comment '事件属性',
) comment '埋点事件表'
;
DWD 明细数据
数据质量检查; 清洗,去重,去空; 脱敏; 维度退化。
DROP TABLE IF EXISTS dwd_shop_user_access;
CREATE TABLE IF NOT EXISTS dwd_shop_user_access (
one_id string,
user_id string comment '用户id',
event_code string comment '商城行为标识', -- shop_enter
event_time string comment '商城行为时间' -- yyyy-MM-dd HH:mm:ss
) comment '商城访问记录表'
;
DROP TABLE IF EXISTS dwd_shop_user_cart;
CREATE TABLE IF NOT EXISTS dwd_shop_user_cart (
one_id string,
user_id string comment '用户id',
event_code string comment '商城行为标识', -- shop_cart
event_time string comment '商城行为时间', -- yyyy-MM-dd HH:mm:ss
product_id string comment '商品id',
product_name string comment '商品名称'
) comment '商城购物车记录表'
;
DROP TABLE IF EXISTS dwd_shop_user_order;
CREATE TABLE IF NOT EXISTS dwd_shop_user_order (
one_id string,
user_id string comment '用户id',
event_code string comment '商城行为标识', -- shop_order
event_time string comment '商城行为时间', -- yyyy-MM-dd HH:mm:ss
amount int comment '支付金额'
) comment '商城下单记录表'
;
HIVE SQL:
insert into table dwd_shop_user_access
select one_id ,user_id ,event_code ,event_time
from ods_event_track_record
where event_code like 'shop_%'
and event_time BETWEEN
DATE_FORMAT(date_sub(current_date,1),'yyyy-MM-dd HH:mm:ss')
and
DATE_FORMAT(current_date(),'yyyy-MM-dd HH:mm:ss') ;
;
DWS 轻度汇总数据
维度关联; 打宽;
DROP TABLE IF EXISTS dws_shop_user_access_hourly;
CREATE TABLE IF NOT EXISTS dws_shop_user_access_hourly (
one_id string,
user_id string comment '用户id',
event_time string comment '商城行为转小时时间', -- yyyymmddHH
enter_flag int comment '是否进入商城', -- 0/1
cart_flag int comment '是否进入购物车行为', -- 0/1
order_flag int comment '是否下单行为', -- 0/1
country_code string comment '国家代码',
province_code string comment '省市代码',
city_code string comment '市县代码'
) comment '用户小时内访问商城记录表'
;
HIVE SQL:
insert into table dws_shop_user_access_hourly
select one_id,
t1.user_id,
date_format(event_time,'yyyymmddHH') as event_time ,
(case when event_code ='enter_shop' then 1 else 0 end) as enter_flag ,
(case when event_code ='shop_cart' then 1 else 0 end) as cart_flag,
(case when event_code ='shop_order' then 1 else 0 end) as cart_flag ,
t2.country_code ,
t2.province_code,
t2.city_code
from test.dwd_shop_user_access t1
left join test.dim_mdm_shop_user t2 on t1.user_id = t2.user_id
and event_time BETWEEN
DATE_FORMAT(date_sub(current_date,1),'yyyy-MM-dd HH:mm:ss')
and
DATE_FORMAT(current_date(),'yyyy-MM-dd HH:mm:ss') ;
;
轻度汇总。
DROP TABLE IF EXISTS dws_shop_user_access_count_hourly;
CREATE TABLE IF NOT EXISTS dws_shop_user_access_count_hourly (
user_id string comment '用户id',
hour_period string comment '时段', -- HH
enter_count bigint comment '进入次数',
cart_count bigint comment '购物车次数',
order_count bigint comment '下单次数',
country_code string comment '国家代码',
province_code string comment '省市代码',
city_code string comment '市县代码'
) comment '用户各时段访问商城行为总次数'
;
HIVE SQL:
insert overwrite table dws_shop_user_access_count_hourly
select
user_id,
SUBSTR(event_time,-2) as hour_period,
sum(enter_flag) over (partition by SUBSTR(event_time,-2)) as enter_count,
sum(cart_flag) over (partition by SUBSTR(event_time,-2)) as cart_count,
sum(order_flag) over (partition by SUBSTR(event_time,-2)) as order_count,
country_code,
province_code,
city_code
from dws_shop_user_access_hourly
;
统计指标。
DROP TABLE IF EXISTS dws_shop_user_access_index;
CREATE TABLE IF NOT EXISTS dws_shop_user_access_index (
user_id string comment '用户id',
all_enter_count bigint comment '进入次数',
all_cart_count bigint comment '购物车次数',
all_order_count bigint comment '下单次数',
month_avg_enter_count bigint comment '月均进入次数',
month_avg_cart_count bigint comment '月均购物车次数',
month_avg_order_count bigint comment '月均下单次数',
country_code string comment '国家代码',
province_code string comment '省市代码',
city_code string comment '市县代码'
) comment '用户各时段访问商城行为总次数'
;
ADS 应用数据服务
DIM 维度
DROP TABLE IF EXISTS dim_area;
CREATE TABLE IF NOT EXISTS dim_area (
country_code string comment '国家代码',
country_name string comment '国家名称',
province_code string comment '省市代码',
province_name string comment '省市名称',
city_code string comment '市县代码',
city_name string comment '市县名称'
) comment '地区维度表';
MDM 主数据
DROP TABLE IF EXISTS mdm_shop_user;
CREATE TABLE IF NOT EXISTS mdm_shop_user (
user_id string comment '用户id',
user_name string comment '用户名称',
country_code string comment '国家代码',
province_code string comment '省市代码',
city_code string comment '市县代码'
) comment '商城顾客表'
;
测试数据
dim-area.csv
ods_event_track_record.csv
A1000001,VA01XM01,P01,enter_shop,2021-05-15 12:11:00
A1000002,VA01XM02,P02,enter_shop,2021-05-15 15:12:00
A1000003,VA01XM03,P03,enter_shop,2021-06-15 16:14:00
mdm_shop_user.csv
P01,张XX,CN,BJ,BJ
P02,李XX,CN,SH,SH
P03,王XX,CN,BJ,BJ