Step 1: Add the resources ADD JAR /usr/lib/hive-0.10.0/lib/hive-contrib-0.10.0.jar; ADD JAR /usr/lib/hive-0.10.0/lib/hive-metastore-0.10.0.jar; ADD JAR /home/hadoop/Documents/JSON-Serde.jar; Step 2: Create the table CREATE TABLE record ( id INT, city_code ARRAY<INT>, email STRING, contact STRUCT<Mobile_no:STRING, Telephone_no:STRING> ) ROW FORMAT SERDE 'org.JSONSerDe'; Step 3: Load the data @Data looks like { "id": 1, "city_code": [ 1, 2, 3 ], "email": "joseph@gmail.com", "contact": { "Mobile_no": "val1", "Telephone_no": "val2" } } { "id": 2, "city_code": [ 4, 5, 6 ], "email": "james@gmail.com", "contact": { "Mobile_no": "val3", "Telephone_no": "val4" } } { "id": 3, "city_code": [ 7, 8, 9 ], "email": "rony@gmail.com", "contact": { "Mobile_no": "val5", "Telephone_no": "val6" } } Load data local inpath '/home/hadoop/Documents/record_data.txt' OVERWRITE INTO TABLE record; Step 4 Retrieve the data select * from record; select contact.Mobile_no from record where id = 1;
Note: You can find the JsonSeDe jar from here https://github.com/cloudera/cdh-twitter-example/blob/master/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java
No comments:
Post a Comment