一、用户启动DWS
在运行之前,先创建数据库:
drop database dws_nshop cascade;create database dws_nshop;
create external table if not exists dws_nshop.dws_nshop_ulog_launch(user_id string comment '用户id',device_num string comment '设备号',device_type string comment '设备类型',os string comment '手机系统',os_version string comment '手机系统版本',manufacturer string comment '手机制造商',carrier string comment '电信运营商',network_type string comment '网络类型',area_code string comment '地区编码',launch_count int comment '启动次数') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_ulog_launch/'
insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20220509')selectdistinct user_id ,device_num ,device_type ,os ,os_version ,manufacturer,carrier ,network_type,area_code ,count(user_id) over (partition by user_id) launch_countfrom dwd_nshop.dwd_nshop_actlog_launchwhere bdp_day="20220509";
二、用户浏览DWS
create external table if not exists dws_nshop.dws_nshop_ulog_view(user_id string comment '用户id',device_num string comment '设备号',device_type string comment '设备类型',os string comment '手机系统',os_version string comment '手机系统版本',manufacturer string comment '手机制造商',carrier string comment '电信运营商',network_type string comment '网络类型',area_code string comment '地区编码',view_count int comment '浏览次数') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_ulog_view/'
insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20220509')selectdistinct user_id ,device_num ,device_type ,os ,os_version ,manufacturer,carrier ,network_type,area_code ,count(user_id) over(partition by user_id) view_countfrom dwd_nshop.dwd_nshop_actlog_pdtviewwhere bdp_day="20220509";
三、用户查询dws
create external table if not exists dws_nshop.dws_nshop_ulog_search(user_id string comment '用户id',device_num string comment '设备号',device_type string comment '设备类型',os string comment '手机系统',os_version string comment '手机系统版本',manufacturer string comment '手机制造商',carrier string comment '电信运营商',network_type string comment '网络类型',area_code string comment '地区编码',search_count int comment '搜索次数') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_ulog_search/'
insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20220509')selectdistinct user_id ,device_num ,device_type ,os ,os_version ,manufacturer,carrier ,network_type,area_code ,count(user_id) over(partition by user_id) search_countfrom dwd_nshop.dwd_nshop_actlog_pdtsearchwhere bdp_day="20220509"
四、用户关注dws
create external table if not exists dws_nshop.dws_nshop_ulog_comment(user_id string comment '用户id',device_num string comment '设备号',device_type string comment '设备类型',os string comment '手机系统',os_version string comment '手机系统版本',manufacturer string comment '手机制造商',carrier string comment '电信运营商',network_type string comment '网络类型',area_code string comment '地区编码',comment_count int comment '关注次数',-- 不去重comment_target_count int comment '关注产品次数',--去重ct bigint comment '产生时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_ulog_comment/'
编写SQL语句:
insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20220509')selectdistinct user_id ,device_num ,device_type ,os ,os_version ,manufacturer,carrier ,network_type,area_code ,count(user_id) over(partition by user_id) as comment_count,count(distinct user_id) over(partition by user_id) as comment_target_count,current_timestamp()from dwd_nshop.dwd_actlog_product_commentwhere bdp_day="20220509"
五、用户交易宽表
create external table if not exists dws_nshop.dws_nshop_user_orders(user_id string comment '用户id',customer_natives string comment '所在区域',orders_count int comment '订单数量',orders_pay DECIMAL(10,1) comment '订单金额',orders_shipping DECIMAL(10,1) comment '订单运费金额',orders_district DECIMAL(10,1) comment '订单优惠金额',ct bigint comment '产生时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_user_orders/'
发现这个里面的数据:customer_natives 是我们的customer表,剩余的是订单明细表
with t1 as (selecta.customer_id,a.district_money ,a.shipping_money,a.payment_money,a.order_id,b.customer_nativesfromdwd_nshop.dwd_nshop_orders_details ajoin ods_nshop.ods_02_customer bon a.customer_id = b.customer_idwhere a.bdp_day = '20220509')insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20220509')selectcustomer_id,customer_natives,count(order_id) over(partition by customer_id),sum(payment_money) over(partition by customer_id),sum(shipping_money) over(partition by customer_id),sum(district_money) over(partition by customer_id),current_timestamp()fromt1;
六、用户投诉宽表
create external table if not exists dws_nshop.dws_nshop_user_complainant(user_id string comment '用户id',area_code string comment '地区编码',compl_orders_count int comment '订单数量',compl_orders_pay DECIMAL(10,1) comment '订单金额',compl_supplier_count int comment '商家数量',ct bigint comment '产生时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_user_complainant/'
订单状态为6的是投诉。
insert overwrite table dws_nshop.dws_nshop_user_complainant partition(bdp_day='20220509')selectdistinct a.customer_id,b.customer_natives,count(a.order_id) over(partition by a.customer_id),sum(a.payment_money) over(partition by a.customer_id),sum(a.supplier_code) over(partition by a.customer_id),current_timestamp()fromdwd_nshop.dwd_nshop_orders_details ajoin ods_nshop.ods_02_customer bon a.customer_id = b.customer_idwhere a.bdp_day = '20220509' and a.order_status=6;
七、商家用户浏览宽表
create external table if not exists dws_nshop.dws_nshop_supplier_user(supplier_id string comment '商家id',supplier_type int comment '供应商类型:1.自营,2.官方 3其他',view_count int comment '浏览次数',comment_users int comment '关注人数',comment_area_code int comment '关注地区数量',ct bigint comment '产生时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/supplier/dws_nshop_supplier_user/'
首先必须有供应商这个表ods_nshop.dim_pub_supplier,用户浏览表dwd_nshop.dwd_nshop_actlog_pdtview 而我们的 用户浏览表只有一个target_id(访问页面的编号),
target_id 跟 dim_pub_page 这个表中的page_code 是直接关系,dim_pub_page 中有商品编号(page_target)
根据商品编号怎么知道供应商的编号?
寻找商品表(ods_nshop.dim_pub_product) 里面有一个供应商的编码(supplier_code)
insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20220509')selectd.supplier_code,d.supplier_type,count(1) as view_count,count(distinct a.user_id) as comment_users,count(distinct a.area_code) as comment_area_code,current_timestamp()from dwd_nshop.dwd_nshop_actlog_pdtview ajoin ods_nshop.dim_pub_page bon a.target_id= b.page_codejoin ods_nshop.dim_pub_product con b.page_target = c.product_codejoin ods_nshop.dim_pub_supplier don c.supplier_code= d.supplier_codewhere bdp_day='20220509'group by d.supplier_code,d.supplier_type;
八、商家日销售宽表
create external table if not exists dws_nshop.dws_nshop_supplier_sales(supplier_id string comment '商家id',supplier_type int comment '供应商类型:1.自营,2.官方 3其他',sales_users int comment '购物人数',sales_users_area int comment '购物地区数量',sales_orders int comment '购物订单数',salaes_orders_pay DECIMAL(10,1) comment '订单金额',salaes_orders_district DECIMAL(10,1) comment '订单优惠金额',ct bigint comment '产生时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/supplier/dws_nshop_supplier_sales/'
insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20220509')selecta.supplier_code,c.supplier_type,count(distinct a.customer_id) sales_users,count(distinct a.consignee_zipcode) as sales_users_area,count(a.order_id) as sales_orders,sum(a.payment_money) as salaes_orders_pay,sum(a.district_money) as salaes_orders_district,current_timestamp()from dwd_nshop.dwd_nshop_orders_details ajoin ods_nshop.dim_pub_product bon a.product_code = b.product_codejoin ods_nshop.dim_pub_supplier con c.supplier_code= b.supplier_codewhere bdp_day='20220509'group by a.supplier_code,c.supplier_type;
九、广告投放用户宽表
create external table if not exists dws_nshop.dws_nshop_release_user(release_sources string comment '投放渠道',release_category string comment '投放浏览产品分类',release_users int comment '投放浏览用户数',release_product_page int comment '投放浏览产品页面数',ct bigint comment '创建时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/release/dws_nshop_release_user/'
insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20220509')selectrelease_sources,release_category,count(distinct customer_id),count(*),current_timestamp()fromdwd_nshop.dwd_nshop_releasedataswhere bdp_day="20220509"group byrelease_sources,release_category
十、用户营销活动宽表
create external table if not exists dws_nshop.dws_nshop_user_release(user_id string comment '用户id',os string comment '手机系统',os_version string comment '手机系统版本',manufacturer string comment '手机制造商',carrier string comment '电信运营商',network_type string comment '网络类型',area_code string comment '地区编码',source_count int comment '投放来源数量',ct bigint comment '产生时间') partitioned by (bdp_day string)stored as parquetlocation '/data/nshop/dws/user/dws_nshop_user_release/';
insert overwrite table dws_nshop.dws_nshop_user_release partition(bdp_day='20220509')selecta.customer_id,a.os ,a.os_version ,a.manufacturer,b.carrier,b.network_type,a.area_code ,count(1) over (partition by a.release_sources),current_timestamp()fromdwd_nshop.dwd_nshop_releasedatas ajoin ods_nshop.ods_nshop_01_useractlog bon a.customer_id = b.customer_id;
