今天练习一下在hive中使用get_json_object这个函数,首先创建一个表将json格式的数据导入
create table json01(line string);
将数据导入:
load data local inpath '/home/hadoop/json_test' into table json01;
导入数据如下:
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"1"}
{"movie":"914","rate":"3","timeStamp":"978301968","uid":"1"}
{"movie":"3408","rate":"4","timeStamp":"978300275","uid":"1"}
{"movie":"2355","rate":"5","timeStamp":"978824291","uid":"1"}
{"movie":"1197","rate":"3","timeStamp":"978302268","uid":"1"}
{"movie":"1287","rate":"5","timeStamp":"978302039","uid":"1"}
{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}
解析movie值,操作如下:
select
get_json_object(m.line,'$.movie') movie
from json01 m;
将解析出的值存入另一个表中:
create table json02 as
select
get_json_object(m.line,’ .movie′)movie,getjsonobject(m.line,′ . m o v i e ′ ) m o v i e , g e t j s o n o b j e c t ( m . l i n e , ′ .rate’) rate,
get_json_object(m.line,’ .timeStamp′)time,getjsonobject(m.line,′ . t i m e S t a m p ′ ) t i m e , g e t j s o n o b j e c t ( m . l i n e , ′ .uid’) uid
from json01 m;
查看表内容:
select * from json02;