//** HBase Hive integration using External table simple example ***/
1. Create the underlying Hbase table using below commands
create 'HBASE_FCM:emp_table','id','name','salary'
describe 'HBASE_FCM:emp_table'
2. Create temporary table to load data from a text file as we cannot load data directly in hive –hbase
integration table.
create table temp_load_emp ( id int, value1 string,value2 int, value3 int)
row format delimited fields terminated by '|' stored as textfile;
3. Load data in temporary table
load data local inpath '/development/Projects/fcm/data/srcfiles/ted04/retail/raw/emp.txt' into table temp_emp;
4. Create Hive table with storage as HBase to perform the HIVE HBase integration
CREATE EXTERNAL TABLE hbase_table_1(id int, name string, salary int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key, name :c, salary:e")
TBLProperties("hbase.table.name" ="emp_table");
serdeproperties: define the schema definition where we define the mapping between hive and hbase column.
tblproperties : define the hbase table corresponding the hive table,
STORED BY is HBase so we will have the table directory in warehouse but there will be no text file.
To view hive warehouse location trigger below command
desc formatted table_name;
5. Insert data from temp table to hive hbase integration table
insert overwrite table hbase_table_2 select * from temp_emp;
//** HBase Hive integration using External table with composite key ***/
Same as above steps will be followed. Only changes will be there in step 4 and 5
4 Create external table with as composite key.
CREATE EXTERNAL TABLE hbase_emp(key struct<id:int,name:string>,
salary int)
ROW FORMAT DELIMITED
COLLECTION ITEMS TERMINATED BY '_'
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,salary:salary")
TBLPROPERTIES("hbase.table.name"="HBASE_FCM:emp_table");
5. Insert data into hive hbase table
insert overwrite table hbase_emp select named_struct('id',id,'name',name),salary from temp_emp;
Note:
Basically what you are doing here is that you are visualizing the composite
key as a struct and specifying that your keys in the composite key are
separated by a "_". When you view hbase table key column value will be like id_name
If we have to perform GROUP BY on any one part of key in your composite
key, you simply run a query like:
select * from hbase_table_2 GROUP BY key.id;
//******* HBASE HIVE integration with internal tables ******************//
CREATE TABLE hbase_table_2(id int, name string, salary int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key, name :c, salary:e")
TBLProperties("hbase.table.name" ="HBASE_FCM:emp_table2");
/**** HIVE HBase integration table with composite key **************/
CREATE TABLE hbase_emp2(key struct<id:int,name:string>,salary int)
ROW FORMAT DELIMITED
COLLECTION ITEMS TERMINATED BY '_'
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,salary:salary")
TBLPROPERTIES("hbase.table.name"="HBASE_FCM:emp_table3");