问题导读:
1、如何设计设备主题?
2、如何设计会员主题(会员主题+漏斗分析)?
3、如何设计商品主题(个数+销量)?
4、如何设计营销主题(用户+商品+购买行为)?
**
一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)
需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句
1. drop table if exists ads_uv_count;2. create external table ads_uv_count(3. `dt` string COMMENT '统计日期',4. `day_count` bigint COMMENT '当日用户数量',5. `wk_count` bigint COMMENT '当周用户数量',6. `mn_count` bigint COMMENT '当月用户数量',7. `is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最终结果',8. `is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最终结果'9. ) COMMENT '活跃设备数'10. row format delimited fields terminated by '\t'11. location '/warehouse/gmall/ads/ads_uv_count/';
2)导入数据
1. insert into table ads_uv_count2. select3. '2020-03-10' dt,4. daycount.ct,5. wkcount.ct,6. mncount.ct,7. if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') ,8. if(last_day('2020-03-10')='2020-03-10','Y','N')9. from10. (11. select12. '2020-03-10' dt,13. count(*) ct14. from dwt_uv_topic15. where login_date_last='2020-03-10'16. )daycount join17. (18. select19. '2020-03-10' dt,20. count (*) ct21. from dwt_uv_topic22. where login_date_last>=date_add(next_day('2020-03-10','MO'),-7)23. and login_date_last<= date_add(next_day('2020-03-10','MO'),-1)24. ) wkcount on daycount.dt=wkcount.dt25. join26. (27. select28. '2020-03-10' dt,29. count (*) ct30. from dwt_uv_topic31. where32. date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM')33. )mncount on daycount.dt=mncount.dt;34.复制代码
3)查询导入结果
1. select * from ads_uv_count;
1.1.2 每日新增设备
1)建表语句
1. drop table if exists ads_new_mid_count;2. create external table ads_new_mid_count3. (4. `create_date` string comment '创建时间' ,5. `new_mid_count` BIGINT comment '新增设备数量'6. ) COMMENT '每日新增设备信息数量'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_new_mid_count/';
2)导入数据
1. insert into table ads_new_mid_count2. select3. login_date_first,4. count(*)5. from dwt_uv_topic6. where login_date_first='2020-03-10'7. group by login_date_first;
3)查询导入数据
1. select * from ads_new_mid_count;2.复制代码
1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句
1. drop table if exists ads_silent_count;2. create external table ads_silent_count(3. `dt` string COMMENT '统计日期',4. `silent_count` bigint COMMENT '沉默设备数'5. )6. row format delimited fields terminated by '\t'7. location '/warehouse/gmall/ads/ads_silent_count';8.复制代码
2)导入 2020-03-20 数据
1. insert into table ads_silent_count2. select3. '2020-03-15',4. count(*)5. from dwt_uv_topic6. where login_date_first=login_date_last7. and login_date_last<=date_add('2020-03-15',-7);
3)查询导入数据
1. select * from ads_silent_count;
1.1.4 本周回流用户数
需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句
1. drop table if exists ads_back_count;2. create external table ads_back_count(3. `dt` string COMMENT '统计日期',4. `wk_dt` string COMMENT '统计日期所在周',5. `wastage_count` bigint COMMENT '回流设备数'6. )7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_back_count';
2)导入数据:
1. insert into table ads_back_count2. select3. '2020-03-15',4. count(*)5. from6. (7. select8. mid_id9. from dwt_uv_topic10. where login_date_last>=date_add(next_day('2020-03-15','MO'),-7)11. and login_date_last<= date_add(next_day('2020-03-15','MO'),-1)12. and login_date_first<date_add(next_day('2020-03-15','MO'),-7)13. )current_wk14. left join15. (16. select17. mid_id18. from dws_uv_detail_daycount19. where dt>=date_add(next_day('2020-03-15','MO'),-7*2)20. and dt<= date_add(next_day('2020-03-15','MO'),-7-1)21. group by mid_id22. )last_wk23. on current_wk.mid_id=last_wk.mid_id24. where last_wk.mid_id is null;25.复制代码
3)查询结果
1. select * from ads_back_count;
1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句
1. drop table if exists ads_wastage_count;2. create external table ads_wastage_count(3. `dt` string COMMENT '统计日期',4. `wastage_count` bigint COMMENT '流失设备数'5. )6. row format delimited fields terminated by '\t'7. location '/warehouse/gmall/ads/ads_wastage_count';
2)导入 2020-03-20 数据
1. insert into table ads_wastage_count2. select3. '2020-03-20',4. count(*)5. from6. (7. select8. mid_id9. from dwt_uv_topic10. where login_date_last<=date_add('2020-03-20',-7)11. group by mid_id12. )t1;
3)查询结果
1. select * from ads_wastage_count;
1.1.6 留存率

1)建表语句
1. drop table if exists ads_user_retention_day_rate;2. create external table ads_user_retention_day_rate3. (4. `stat_date` string comment '统计日期',5. `create_date` string comment '设备新增日期',6. `retention_day` int comment '截止当前日期留存天数',7. `retention_count` bigint comment '留存数量',8. `new_mid_count` bigint comment '设备新增数量',9. `retention_ratio` decimal(10,2) comment '留存率'10. ) COMMENT '每日用户留存情况'11. row format delimited fields terminated by '\t'12. location '/warehouse/gmall/ads/ads_user_retention_day_rate/';
2)导入数据
1. insert into table ads_user_retention_day_rate2. select3. '2020-03-10',--统计日期4. date_add('2020-03-10',-1),--新增日期5. 1,--留存天数6. sum(if(login_date_first=date_add('2020-03-10',-1) and7. login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存数8. sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增9. sum(if(login_date_first=date_add('2020-03-10',-1) and10. login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-11. 1),1,0))*10012. from dwt_uv_topic13. union all14. select15. '2020-03-10',--统计日期16. date_add('2020-03-10',-2),--新增日期17. 2,--留存天数18. sum(if(login_date_first=date_add('2020-03-10',-2) and19. login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存数20. sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增21. sum(if(login_date_first=date_add('2020-03-10',-2) and22. login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-23. 2),1,0))*10024. from dwt_uv_topic25. union all26. select27. '2020-03-10',--统计日期28. date_add('2020-03-10',-3),--新增日期29. 3,--留存天数30. sum(if(login_date_first=date_add('2020-03-10',-3) and31. login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存数32. sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增33. sum(if(login_date_first=date_add('2020-03-10',-3) and34. login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-35. 3),1,0))*10036. from dwt_uv_topic;
3)查询导入数据
1. select * from ads_user_retention_day_rate;2.复制代码
1.1.7 最近连续三周活跃用户数
1)建表语句
1. drop table if exists ads_continuity_wk_count;2. create external table ads_continuity_wk_count(3. `dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日4. 期',5. `wk_dt` string COMMENT '持续时间',6. `continuity_count` bigint COMMENT '活跃次数'7. )8. row format delimited fields terminated by '\t'9. location '/warehouse/gmall/ads/ads_continuity_wk_count';
2)导入 2020-03-20 所在周的数据
1. insert into table ads_continuity_wk_count2. select3. '2020-03-15',4. concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day('5. 2020-03-15','MO'),-1)),6. count(*)7. from8. (9. select10. mid_id11. from12. (13. select14. mid_id15. from dws_uv_detail_daycount16. where dt>=date_add(next_day('2020-03-10','monday'),-7)17. and dt<=date_add(next_day('2020-03-10','monday'),-1)18. group by mid_id19. union all20. select21. mid_id22. from dws_uv_detail_daycount23. where dt>=date_add(next_day('2020-03-10','monday'),-7*2)24. and dt<=date_add(next_day('2020-03-10','monday'),-7-1)25. group by mid_id26. union all27. select28. mid_id29. from dws_uv_detail_daycount30. where dt>=date_add(next_day('2020-03-10','monday'),-7*3)31. and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1)32. group by mid_id33. )t134. group by mid_id35. having count(*)=336. )t2
3)查询
1. select * from ads_continuity_wk_count;
1.1.8 最近七天内连续三天活跃用户数
1)建表语句
1. drop table if exists ads_continuity_uv_count;2. create external table ads_continuity_uv_count(3. `dt` string COMMENT '统计日期',4. `wk_dt` string COMMENT '最近 7 天日期',5. `continuity_count` bigint6. ) COMMENT '连续活跃设备数'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_continuity_uv_count';
2)写出导入数据的 SQL 语句
1. insert into table ads_continuity_uv_count2. select3. '2020-03-12',4. concat(date_add('2020-03-12',-6),'_','2020-03-12'),5. count(*)6. from7. (8. select mid_id9. from10. (11. select mid_id12. from13. (14. select15. mid_id,16. date_sub(dt,rank) date_dif17. from18. (19. select20. mid_id,21. dt,22. rank() over(partition by mid_id order by dt) rank23. from dws_uv_detail_daycount24. where dt>=date_add('2020-03-12',-6) and25. dt<='2020-03-12'26. )t127. )t228. group by mid_id,date_dif29. having count(*)>=330. )t331. group by mid_id32. )t4;
3)查询
1. select * from ads_continuity_uv_count;
1.2 会员主题
1.2.1 会员主题信息
1)建表
1. drop table if exists ads_user_topic;2. create external table ads_user_topic(3. `dt` string COMMENT '统计日期',4. `day_users` string COMMENT '活跃会员数',5. `day_new_users` string COMMENT '新增会员数',6. `day_new_payment_users` string COMMENT '新增消费会员数',7. `payment_users` string COMMENT '总付费会员数',8. `users` string COMMENT '总会员数',9. `day_users2users` decimal(10,2) COMMENT '会员活跃率',10. `payment_users2users` decimal(10,2) COMMENT '会员付费率',11. `day_new_users2users` decimal(10,2) COMMENT '会员新鲜度'12. ) COMMENT '会员主题信息表'13. row format delimited fields terminated by '\t'14. location '/warehouse/gmall/ads/ads_user_topic';15.复制代码
2)导入数据
1. insert into table ads_user_topic2. select3. '2020-03-10',4. sum(if(login_date_last='2020-03-10',1,0)),5. sum(if(login_date_first='2020-03-10',1,0)),6. sum(if(payment_date_first='2020-03-10',1,0)),7. sum(if(payment_count>0,1,0)),8. count(*),9. sum(if(login_date_last='2020-03-10',1,0))/count(*),10. sum(if(payment_count>0,1,0))/count(*),11. sum(if(login_date_first='2020-03-10',1,0))/sum(if(login_date_last='2020-03-10',1,0))12. from dwt_user_topic
3)查询数据
1. hive (gmall)> select * from ads_user_topic;
4)vim ads_user_topic.sh
添加如下内容:
1. #!/bin/bash2. APP=gmall3. hive=/opt/modules/hive/bin/hive4. # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天5. if [ -n "$1" ] ;then6. do_date=$17. else8. do_date=`date -d "-1 day" +%F`9. fi10. sql="11. with12. tmp_day_users as13. (14. select15. '$do_date' dt,16. count(*) day_users17. from18. ${APP}.dwt_user_topic19. where20. login_date_last='$do_date'21. ),22. tmp_day_new_users as23. (24. select25. '$do_date' dt,26. count(*) day_new_users27. from28. ${APP}.dwt_user_topic29. where30. login_date_last='$do_date' and login_date_first='$do_date'31. ),32. tmp_day_new_payment_users as33. (34. select35. '$do_date' dt,36. count(*) day_new_payment_users37. from38. ${APP}.dwt_user_topic39. where40. payment_date_first='$do_date'41. ),42. tmp_payment_users as43. (44. select45. '$do_date' dt,46. count(*) payment_users47. from48. ${APP}.dwt_user_topic49. where50. payment_date_first is not null51. ),52. tmp_users as53. (54. select55. '$do_date' dt,56. count(*) users57. from58. ${APP}.dwt_user_topic59. tmp_users60. )61. insert into table ${APP}.ads_user_topic62. select63. '$do_date' dt,64. day_users,65. day_new_users,66. day_new_payment_users,67. payment_users,68. users,69. day_users/users,70. payment_users/users,71. day_new_users/users72. from73. tmp_day_users74. join75. tmp_day_new_users76. on77. tmp_day_users.dt=tmp_day_new_users.dt78. join79. tmp_day_new_payment_users80. on81. tmp_day_users.dt=tmp_day_new_payment_users.dt82. join83. tmp_payment_users84. on85. tmp_day_users.dt=tmp_payment_users.dt86. join87. tmp_users88. on89. tmp_day_users.dt=tmp_users.dt;90. "91. $hive -e "$sql"92.复制代码
5)增加脚本执行权限
1. chmod 770 ads_user_topic.sh
6)执行脚本导入数据
1. ads_user_topic.sh 2020-03-11
7)查看导入数据
1. select * from ads_user_topic;
1.2.2 漏斗分析
统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句
1. drop table if exists ads_user_action_convert_day;2. create external table ads_user_action_convert_day(3. `dt` string COMMENT '统计日期',4. `total_visitor_m_count` bigint COMMENT '总访问人数',5. `cart_u_count` bigint COMMENT '加入购物车的人数',6. `visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率',7. `order_u_count` bigint COMMENT '下单人数',8. `cart2order_convert_ratio` decimal(10,2) COMMENT '加入购物车到下单转化率',9. `payment_u_count` bigint COMMENT '支付人数',10. `order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率'11. ) COMMENT '用户行为漏斗分析'12. row format delimited fields terminated by '\t'13. location '/warehouse/gmall/ads/ads_user_action_convert_day/';
2)数据装载
1. insert into table ads_user_action_convert_day2. select3. '2020-03-10',4. uv.day_count,5. ua.cart_count,6. cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio,7. ua.order_count,8. cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio,9. ua.payment_count,10. cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio11. from12. (13. select14. dt,15. sum(if(cart_count>0,1,0)) cart_count,16. sum(if(order_count>0,1,0)) order_count,17. sum(if(payment_count>0,1,0)) payment_count18. from dws_user_action_daycount19. where dt='2020-03-10'20. group by dt21. )ua join ads_uv_count uv on uv.dt=ua.dt;
3)查询加载数据
1. select * from ads_user_action_convert_day;2.复制代码
1.3 商品主题
1.3.1 商品个数信息
1)建表语句
1. drop table if exists ads_product_info;2. create external table ads_product_info(3. `dt` string COMMENT '统计日期',4. `sku_num` string COMMENT 'sku 个数',5. `spu_num` string COMMENT 'spu 个数'6. ) COMMENT '商品个数信息'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_product_info';
2)导入数据
1. insert into table ads_product_info2. select3. '2020-03-10' dt,4. sku_num,5. spu_num6. from7. (8. select9. '2020-03-10' dt,10. count(*) sku_num11. from12. dwt_sku_topic13. ) tmp_sku_num14. join15. (16. select17. '2020-03-10' dt,18. count(*) spu_num19. from20. (21. select22. spu_id23. from24. dwt_sku_topic25. group by26. spu_id27. ) tmp_spu_id28. ) tmp_spu_num29. on30. tmp_sku_num.dt=tmp_spu_num.dt;
3)查询结果数据
1. select * from ads_product_info;
1.3.2 商品销量排名
1)建表语句
1. drop table if exists ads_product_sale_topN;2. create external table ads_product_sale_topN(3. `dt` string COMMENT '统计日期',4. `sku_id` string COMMENT '商品 ID',5. `payment_amount` bigint COMMENT '销量'6. ) COMMENT '商品个数信息'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_product_sale_topN';
2)导入数据
1. insert into table ads_product_sale_topN2. select3. '2020-03-10' dt,4. sku_id,5. payment_amount6. from7. dws_sku_action_daycount8. where9. dt='2020-03-10'10. order by payment_amount desc11. limit 10;
3)查询结果数据
1. select * from ads_product_sale_topN;2.复制代码
1.3.3 商品收藏排名
1)建表语句
1. drop table if exists ads_product_favor_topN;2. create external table ads_product_favor_topN(3. `dt` string COMMENT '统计日期',4. `sku_id` string COMMENT '商品 ID',5. `favor_count` bigint COMMENT '收藏量'6. ) COMMENT '商品收藏 TopN'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_product_favor_topN';
2)导入数据
1. insert into table ads_product_favor_topN2. select3. '2020-03-10' dt,4. sku_id,5. favor_count6. from7. dws_sku_action_daycount8. where9. dt='2020-03-10'10. order by favor_count desc11. limit 10;
3)查询数据
1. select * from ads_product_favor_topN;
1.3.4 商品加入购物车排名
1)建表语句
1. drop table if exists ads_product_cart_topN;2. create external table ads_product_cart_topN(3. `dt` string COMMENT '统计日期',4. `sku_id` string COMMENT '商品 ID',5. `cart_num` bigint COMMENT '加入购物车数量'6. ) COMMENT '商品加入购物车 TopN'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_product_cart_topN';
2)导入数据
1. insert into table ads_product_cart_topN2. select3. '2020-03-10' dt,4. sku_id,5. cart_num6. from7. dws_sku_action_daycount8. where9. dt='2020-03-10'10. order by cart_num desc11. limit 10;
3)查询数据
1. select * from ads_product_cart_topN;
1.3.5 商品退款率排名(最近 30 天)
1)建表语句
1. drop table if exists ads_product_refund_topN;2. create external table ads_product_refund_topN(3. `dt` string COMMENT '统计日期',4. `sku_id` string COMMENT '商品 ID',5. `refund_ratio` decimal(10,2) COMMENT '退款率'6. ) COMMENT '商品退款率 TopN'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_product_refund_topN';
2)导入数据
1. insert into table ads_product_refund_topN2. select3. '2020-03-10',4. sku_id,5. refund_last_30d_count/payment_last_30d_count*100 refund_ratio6. from dwt_sku_topic7. order by refund_ratio desc8. limit 10;
3)查询数据
1. select * from ads_product_refund_topN;
1.3.6 商品差评率
1)建表语句
1. drop table if exists ads_appraise_bad_topN;2. create external table ads_appraise_bad_topN(3. `dt` string COMMENT '统计日期',4. `sku_id` string COMMENT '商品 ID',5. `appraise_bad_ratio` decimal(10,2) COMMENT '差评率'6. ) COMMENT '商品差评率 TopN'7. row format delimited fields terminated by '\t'8. location '/warehouse/gmall/ads/ads_appraise_bad_topN';
2)导入数据
1. insert into table ads_appraise_bad_topN2. select3. '2020-03-10' dt,4. sku_id,5. appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun6. t+appraise_default_count) appraise_bad_ratio7. from8. dws_sku_action_daycount9. where10. dt='2020-03-10'11. order by appraise_bad_ratio desc12. limit 10;
3)查询数据
1. select * from ads_appraise_bad_topN;2.复制代码
1.4 营销主题(用户+商品+购买行为)
1.4.1 下单数目统计
需求分析:统计每日下单数,下单金额及下单用户数
1)建表语句
1. drop table if exists ads_order_daycount;2. create external table ads_order_daycount(3. dt string comment '统计日期',4. order_count bigint comment '单日下单笔数',5. order_amount decimal(10,2) comment '单日下单金额',6. order_users bigint comment '单日下单用户数'7. ) comment '每日订单总计表'8. row format delimited fields terminated by '\t'9. location '/warehouse/gmall/ads/ads_order_daycount';
2)导入数据
1. insert into table ads_order_daycount2. select3. '2020-03-10',4. sum(order_count),5. sum(order_amount),6. sum(if(order_count>0,1,0))7. from dws_user_action_daycount8. where dt='2020-03-10';
3)查询数据
1. select * from ads_order_daycount;2.复制代码
1.4.2 支付信息统计
每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)
1)建表
1. drop table if exists ads_payment_daycount;2. create external table ads_payment_daycount(3. dt string comment '统计日期',4. payment_count bigint comment '单日支付笔数',5. payment_amount decimal(10,2) comment '单日支付金额',6. payment_user_count bigint comment '单日支付人数',7. payment_sku_count bigint comment '单日支付商品数',8. payment_avg_time double comment '下单到支付的平均时长,取分钟数'9. ) comment '每日订单总计表'10. row format delimited fields terminated by '\t'11. location '/warehouse/gmall/ads/ads_payment_daycount';
2)导入数据
1. insert into table ads_payment_daycount2. select3. tmp_payment.dt,4. tmp_payment.payment_count,5. tmp_payment.payment_amount,6. tmp_payment.payment_user_count,7. tmp_skucount.payment_sku_count,8. tmp_time.payment_avg_time9. from10. (11. select12. '2020-03-15' dt,13. sum(payment_count) payment_count,14. sum(payment_amount) payment_amount,15. sum(if(payment_count>0,1,0)) payment_user_count16. from dws_user_action_daycount17. where dt='2020-03-15'18. )tmp_payment19. join20. (21. select22. '2020-03-15' dt,23. sum(if(payment_count>0,1,0)) payment_sku_count24. from dws_sku_action_daycount25. where dt='2020-03-15'26. )tmp_skucount on tmp_payment.dt=tmp_skucount.dt27. join28. (29. select30. '2020-03-15' dt,31. sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/6032. payment_avg_time33. from dwd_fact_order_info34. where dt='2020-03-15'35. and payment_time is not null36. )tmp_time on tmp_payment.dt=tmp_time.dt
3)查询数据
1. select * from ads_payment_daycount;
1.4.3 复购率
1)建表语句
1. drop table ads_sale_tm_category1_stat_mn;2. create external table ads_sale_tm_category1_stat_mn3. (4. tm_id string comment '品牌 id',5. category1_id string comment '1 级品类 id ',6. category1_name string comment '1 级品类名称 ',7. buycount bigint comment '购买人数',8. buy_twice_last bigint comment '两次以上购买人数',9. buy_twice_last_ratio decimal(10,2) comment '单次复购率',10. buy_3times_last bigint comment '三次以上购买人数',11. buy_3times_last_ratio decimal(10,2) comment '多次复购率',12. stat_mn string comment '统计月份',13. stat_date string comment '统计日期'14. ) COMMENT '复购率统计'15. row format delimited fields terminated by '\t'16. location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';
2)数据导入
1. insert into table ads_sale_tm_category1_stat_mn2. select3. mn.sku_tm_id,4. mn.sku_category1_id,5. mn.sku_category1_name,6. sum(if(mn.order_count>=1,1,0)) buycount,7. sum(if(mn.order_count>=2,1,0)) buyTwiceLast,8. sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))9. buyTwiceLastRatio,10. sum(if(mn.order_count>=3,1,0)) buy3timeLast ,11. sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))12. buy3timeLastRatio ,13. date_format('2020-03-10' ,'yyyy-MM') stat_mn,14. '2020-03-10' stat_date15. from16. (17. select18. user_id,19. sd.sku_tm_id,20. sd.sku_category1_id,21. sd.sku_category1_name,22. sum(order_count) order_count23. from dws_sale_detail_daycount sd24. where date_format(dt,'yyyy-MM')=date_format('2020-03-10' ,'yyyy-MM')25. group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name26. ) mn27. group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
1.5 ADS 层导入脚本
1)vim dwt_to_ads.sh
在脚本中填写如下内容
1. #!/bin/bash2. hive=/opt/modules/hive/bin/hive3. # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天4. if [ -n "$1" ] ;then5. do_date=$16. else7. do_date=`date -d "-1 day" +%F`8. fi9. sql="use gmall;10. insert into table ads_uv_count11. select12. '$do_date',13. sum(if(login_date_last='$do_date',1,0)),14. sum(if(login_date_last>=date_add(next_day('$do_date','monday'),-7) and15. login_date_last<=date_add(next_day('$do_date','monday'),-1) ,1,0)),16. sum(if(date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-M17. M'),1,0)),18. if('$do_date'=date_add(next_day('$do_date','monday'),-1),'Y','N'),19. if('$do_date'=last_day('$do_date'),'Y','N')20. from dwt_uv_topic;21.22.23. insert into table ads_new_mid_count24. select25. '$do_date',26. count(*)27. from dwt_uv_topic28. where login_date_first='$do_date';29.30.31. insert into table ads_silent_count32. select33. '$do_date',34. count(*)35. from dwt_uv_topic36. where login_date_first=login_date_last37. and login_date_last<=date_add('$do_date',-7);38.39.40. insert into table ads_back_count41. select42. '$do_date',43. concat(date_add(next_day('2020-03-10','MO'),-7),'_',date_add(next_day('2020-44. 03-10','MO'),-1)),45. count(*)46. from47. (48. select49. mid_id50. from dwt_uv_topic51. where login_date_last>=date_add(next_day('$do_date','MO'),-7)52. and login_date_last<= date_add(next_day('$do_date','MO'),-1)53. and login_date_first<date_add(next_day('$do_date','MO'),-7)54. )current_wk55. left join56. (57. select58. mid_id59. from dws_uv_detail_daycount60. where dt>=date_add(next_day('$do_date','MO'),-7*2)61. and dt<= date_add(next_day('$do_date','MO'),-7-1)62. group by mid_id63. )last_wk64. on current_wk.mid_id=last_wk.mid_id65. where last_wk.mid_id is null;66.67.68. insert into table ads_wastage_count69. select70. '$do_date',71. count(*)72. from dwt_uv_topic73. where login_date_last<=date_add('$do_date',-7);74.75.76. insert into table ads_user_retention_day_rate77. select78. '$do_date',79. date_add('$do_date',-3),80. 3,81. sum(if(login_date_first=date_add('$do_date',-3) and82. login_date_last='$do_date',1,0)),83. sum(if(login_date_first=date_add('$do_date',-3),1,0)),84. sum(if(login_date_first=date_add('$do_date',-3) and85. login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',86. -3),1,0))*10087. from dwt_uv_topic88. union all89. select90. '$do_date',91. date_add('$do_date',-2),92. 2,93. sum(if(login_date_first=date_add('$do_date',-2) and94. login_date_last='$do_date',1,0)),95. sum(if(login_date_first=date_add('$do_date',-2),1,0)),96. sum(if(login_date_first=date_add('$do_date',-2) and97. login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',98. -2),1,0))*10099. from dwt_uv_topic100. union all101. select102. '$do_date',103. date_add('$do_date',-1),104. 1,105. sum(if(login_date_first=date_add('$do_date',-1) and106. login_date_last='$do_date',1,0)),107. sum(if(login_date_first=date_add('$do_date',-1),1,0)),108. sum(if(login_date_first=date_add('$do_date',-1) and109. login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',110. -1),1,0))*100111. from dwt_uv_topic;112.113.114. insert into table ads_continuity_wk_count115. select116. '$do_date',117. concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_d118. ate','MO'),-1)),119. count(*)120. from121. (122. select123. mid_id124. from125. (126. select127. mid_id128. from dws_uv_detail_daycount129. where dt>=date_add(next_day('$do_date','monday'),-7)130. and dt<=date_add(next_day('$do_date','monday'),-1)131. group by mid_id132. union all133. select134. mid_id135. from dws_uv_detail_daycount136. where dt>=date_add(next_day('$do_date','monday'),-7*2)137. and dt<=date_add(next_day('$do_date','monday'),-7-1)138. group by mid_id139. union all140. select141. mid_id142. from dws_uv_detail_daycount143. where dt>=date_add(next_day('$do_date','monday'),-7*3)144. and dt<=date_add(next_day('$do_date','monday'),-7*2-1)145. group by mid_id146. )t1147. group by mid_id148. having count(*)=3149. )t2;150.151.152. insert into table ads_continuity_uv_count153. select154. '$do_date',155. concat(date_add('$do_date',-6),'_','$do_date'),156. count(*)157. from158. (159. select mid_id160. from161. (162. select mid_id163. from164. (165. select166. mid_id,167. date_sub(dt,rank) date_dif168. from169. (170. select171. mid_id,172. dt,173. rank() over(partition by mid_id order by dt) rank174. from dws_uv_detail_daycount175. where dt>=date_add('$do_date',-6) and dt<='$do_date'176. )t1177. )t2178. group by mid_id,date_dif179. having count(*)>=3180. )t3181. group by mid_id182. )t4;183.184.185. insert into table ads_user_topic186. select187. '$do_date',188. sum(if(login_date_last='$do_date',1,0)),189. sum(if(login_date_first='$do_date',1,0)),190. sum(if(payment_date_first='$do_date',1,0)),191. sum(if(payment_count>0,1,0)),192. count(*),193. sum(if(login_date_last='$do_date',1,0))/count(*),194. sum(if(payment_count>0,1,0))/count(*),195. sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,196. 0))197. from dwt_user_topic;198.199.200. insert into table ads_user_action_convert_day201. select202. '$do_date',203. uv.day_count,204. ua.cart_count,205. ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio,206. ua.order_count,207. ua.order_count/ua.cart_count*100 visitor2order_convert_ratio,208. ua.payment_count,209. ua.payment_count/ua.order_count*100 order2payment_convert_ratio210. from211. (212. select213. '$do_date' dt,214. sum(if(cart_count>0,1,0)) cart_count,215. sum(if(order_count>0,1,0)) order_count,216. sum(if(payment_count>0,1,0)) payment_count217. from dws_user_action_daycount218. where dt='$do_date'219. )ua join ads_uv_count uv on uv.dt=ua.dt;220.221.222. insert into table ads_product_info223. select224. '$do_date' dt,225. sku_num,226. spu_num227. from228. (229. select230. '$do_date' dt,231. count(*) sku_num232. from233. dwt_sku_topic234. ) tmp_sku_num235. join236. (237. select238. '$do_date' dt,239. count(*) spu_num240. from241. (242. select243. spu_id244. from245. dwt_sku_topic246. group by247. spu_id248. ) tmp_spu_id249. ) tmp_spu_num250. on tmp_sku_num.dt=tmp_spu_num.dt;251.252.253. insert into table ads_product_sale_topN254. select255. '$do_date',256. sku_id,257. payment_amount258. from dws_sku_action_daycount259. where dt='$do_date'260. order by payment_amount desc261. limit 10;262.263.264. insert into table ads_product_favor_topN265. select266. '$do_date',267. sku_id,268. favor_count269. from dws_sku_action_daycount270. where dt='$do_date'271. order by favor_count272. limit 10;273.274.275. insert into table ads_product_cart_topN276. select277. '$do_date' dt,278. sku_id,279. cart_num280. from dws_sku_action_daycount281. where dt='$do_date'282. order by cart_num283. limit 10;284.285.286. insert into table ads_product_refund_topN287. select288. '$do_date',289. sku_id,290. refund_last_30d_count/payment_last_30d_count*100 refund_ratio291. from dwt_sku_topic292. order by refund_ratio desc293. limit 10;294.295.296. insert into table ads_appraise_bad_topN297. select298. '$do_date' dt,299. sku_id,300. appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun301. t+appraise_default_count)*100 appraise_bad_ratio302. from dws_sku_action_daycount303. where dt='$do_date'304. order by appraise_bad_ratio desc305. limit 10;306.307.308. insert into table ads_order_daycount309. select310. '$do_date',311. sum(order_count),312. sum(order_amount),313. sum(if(order_count>0,1,0))314. from dws_user_action_daycount315. where dt='$do_date';316.317.318. insert into table ads_payment_daycount319. select320. tmp_payment.dt,321. tmp_payment.payment_count,322. tmp_payment.payment_amount,323. tmp_payment.payment_user_count,324. tmp_skucount.payment_sku_count,325. tmp_time.payment_avg_time326. from327. (328. select329. '$do_date' dt,330. sum(payment_count) payment_count,331. sum(payment_amount) payment_amount,332. sum(if(payment_count>0,1,0)) payment_user_count333. from dws_user_action_daycount334. where dt='$do_date'335. )tmp_payment336. join337. (338. select339. '$do_date' dt,340. sum(if(payment_count>0,1,0)) payment_sku_count341. from dws_sku_action_daycount342. where dt='$do_date'343. )tmp_skucount on tmp_payment.dt=tmp_skucount.dt344. join345. (346. select347. '$do_date' dt,348. sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60349. payment_avg_time350. from dwd_fact_order_info351. where dt='$do_date'352. and payment_time is not null353. )tmp_time on tmp_payment.dt=tmp_time.dt;354.355.356. insert into table ads_sale_tm_category1_stat_mn357. select358. mn.sku_tm_id,359. mn.sku_category1_id,360. mn.sku_category1_name,361. sum(if(mn.order_count>=1,1,0)) buycount,362. sum(if(mn.order_count>=2,1,0)) buyTwiceLast,363. sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))364. buyTwiceLastRatio,365. sum(if(mn.order_count>=3,1,0)) buy3timeLast ,366. sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))367. buy3timeLastRatio ,368. date_format('$do_date' ,'yyyy-MM') stat_mn,369. '$do_date' stat_date370. from371. (372. select373. user_id,374. sd.sku_tm_id,375. sd.sku_category1_id,376. sd.sku_category1_name,377. sum(order_count) order_count378. from dws_sale_detail_daycount sd379. where date_format(dt,'yyyy-MM')=date_format('$do_date' ,'yyyy-MM')380. group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name381. ) mn382. group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;383. "384. $hive -e "$sql"385.复制代码
2)增加脚本执行权限
1. chmod 770 dwt_to_ads.sh
3)执行脚本导入数据
1. dwt_to_ads.sh 2020-03-10
4)查看导入数据
