在hive上建表与普通分区表创建方法一样;
1 CREATE TABLE `dwa_m_user_association_circle`( 2 `device_number` string, 3 `oppo_number` string, 4 `prov_id_oppo` string, 5 `area_id_oppo` string, 6 `dealer_oppo` string, 7 `short_call_nums` bigint, 8 `long3_call_nums` bigint, 9 `long5_call_nums` bigint, 10 `long10_call_nums` bigint, 11 `short_total_nums` bigint, 12 `long3_total_nums` bigint, 13 `long5_total_nums` bigint, 14 `long10_total_nums` bigint, 15 `area_id` string)16 PARTITIONED BY ( 17 `month_id` string, 18 `prov_id` string, 19 `dealer` string)20 ROW FORMAT DELIMITED 21 FIELDS TERMINATED BY '|' 22 NULL DEFINED AS '' 23 STORED AS INPUTFORMAT 24 'org.apache.hadoop.mapred.TextInputFormat' 25 OUTPUTFORMAT 26 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'27 LOCATION28 'hdfs://beh/user/hive/warehouse/all_ana_pro.db/dwa_m_user_association_circle'29 TBLPROPERTIES (30 'numPartitions'='248', 31 'numFiles'='648', 32 'transient_lastDdlTime'='1542952067', 33 'totalSize'='247584222644', 34 'numRows'='0', 35 'rawDataSize'='0')
在shell脚本中,需设置的参数:
set hive.exec.dynamic.partition=true; #开启动态分区,默认是false
set hive.exec.dynamic.partition.mode=nostrict; #开启允许所有分区都是动态的,否则必须要有静态分区才能使用。set hive.exec.max.created.files=1000000; #允许创建的最大文件数,当分区是2个或三个分区时,文件会被分成很多小文件,该设置就是将文件的最大数目设成100w;
1 insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle PARTITION (month_id=${v_month},prov_id,dealer) 2 select device_number, 3 oppo_number , 4 prov_id_oppo , 5 area_id_oppo , 6 dealer_oppo , 7 short_call_nums, 8 long3_call_nums, 9 long5_call_nums,10 long10_call_nums,11 sum(short_call_nums) over(distribute by device_number) short_total_nums,12 sum(long3_call_nums ) over(distribute by device_number) long3_total_nums,13 sum(long5_call_nums) over(distribute by device_number) long5_total_nums,14 sum(long10_call_nums) over(distribute by device_number) long10_total_nums,15 area_id ,16 prov_id ,17 dealer 18 from 19 (SELECT device_number,20 prov_id ,21 area_id ,22 dealer ,23 oppo_number ,24 prov_id_oppo ,25 area_id_oppo ,26 dealer_oppo ,27 sum(case when t.bill_times < 60 then 1 else 0 end ) short_call_nums,28 sum(case when t.bill_times >180 then 1 else 0 end ) long3_call_nums,29 sum(case when t.bill_times >300 then 1 else 0 end ) long5_call_nums,30 sum(case when t.bill_times >600 then 1 else 0 end ) long10_call_nums31 FROM ( SELECT prov_id ,32 area_id ,33 device_number,34 0 dealer ,35 prov_id_oppo ,36 area_id_oppo ,37 oppo_number ,38 dealer_oppo ,39 bill_times40 FROM ALL_ANA_PRO.DWA_M_CALL_RING_BASE t41 WHERE month_id = '${v_month}'42 AND ticket_type = 143 AND dealer_oppo > -144 UNION ALL45 SELECT prov_id_oppo prov_id ,46 area_id_oppo area_id ,47 oppo_number device_number ,48 dealer_oppo dealer ,49 prov_id prov_id_oppo ,50 area_id area_id_oppo ,51 device_number oppo_number ,52 0 dealer_oppo ,53 bill_times54 FROM ALL_ANA_PRO.DWA_M_CALL_RING_BASE t55 WHERE month_id = '${v_month}'56 AND ticket_type = 157 AND dealer_oppo > -158 ) t59 GROUP BY60 device_number,61 prov_id ,62 area_id ,63 dealer ,64 oppo_number ,65 prov_id_oppo ,66 area_id_oppo ,67 dealer_oppo68 ) t;
这里,需要说明的是,向目标表插数:
insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle PARTITION (month_id=${v_month},prov_id,dealer)
其中select语句中动态分区prov_id,dealer两个字段一般在最后面;