Skip to content

Commit b8c0252

Browse files
author
Brock Noland
committed
HIVE-6147 - Support avro data stored in HBase columns (Swarnim Kulkarni via Brock)
git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1623845 13f79535-47bb-0310-9956-ffa450edef68
1 parent afecaff commit b8c0252

35 files changed

+4406
-111
lines changed

hbase-handler/if/test/avro_test.avpr

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
{
2+
"protocol": "EmployeeAvro",
3+
"namespace": "org.apache.hadoop.hive.hbase.avro",
4+
"types": [
5+
{
6+
"type": "enum",
7+
"name": "Gender",
8+
"symbols": [
9+
"MALE",
10+
"FEMALE"
11+
]
12+
},
13+
{
14+
"type": "record",
15+
"name": "HomePhone",
16+
"fields": [
17+
{
18+
"name": "areaCode",
19+
"type": "long"
20+
},
21+
{
22+
"name": "number",
23+
"type": "long"
24+
}
25+
]
26+
},
27+
{
28+
"type": "record",
29+
"name": "OfficePhone",
30+
"fields": [
31+
{
32+
"name": "areaCode",
33+
"type": "long"
34+
},
35+
{
36+
"name": "number",
37+
"type": "long"
38+
}
39+
]
40+
},
41+
{
42+
"type": "record",
43+
"name": "Address",
44+
"fields": [
45+
{
46+
"name": "address1",
47+
"type": "string"
48+
},
49+
{
50+
"name": "address2",
51+
"type": "string"
52+
},
53+
{
54+
"name": "city",
55+
"type": "string"
56+
},
57+
{
58+
"name": "zipcode",
59+
"type": "long"
60+
},
61+
{
62+
"name": "county",
63+
"type": [
64+
"HomePhone",
65+
"OfficePhone",
66+
"string",
67+
"null"
68+
]
69+
},
70+
{
71+
"name": "aliases",
72+
"type": [
73+
{
74+
"type": "array",
75+
"items": "string"
76+
},
77+
"null"
78+
]
79+
},
80+
{
81+
"name": "metadata",
82+
"type": [
83+
"null",
84+
{
85+
"type": "map",
86+
"values": "string"
87+
}
88+
]
89+
}
90+
]
91+
},
92+
{
93+
"type": "record",
94+
"name": "ContactInfo",
95+
"fields": [
96+
{
97+
"name": "address",
98+
"type": [
99+
{
100+
"type": "array",
101+
"items": "Address"
102+
},
103+
"null"
104+
]
105+
},
106+
{
107+
"name": "homePhone",
108+
"type": "HomePhone"
109+
},
110+
{
111+
"name": "officePhone",
112+
"type": "OfficePhone"
113+
}
114+
]
115+
},
116+
{
117+
"type": "record",
118+
"name": "Employee",
119+
"fields": [
120+
{
121+
"name": "employeeName",
122+
"type": "string"
123+
},
124+
{
125+
"name": "employeeID",
126+
"type": "long"
127+
},
128+
{
129+
"name": "age",
130+
"type": "long"
131+
},
132+
{
133+
"name": "gender",
134+
"type": "Gender"
135+
},
136+
{
137+
"name": "contactInfo",
138+
"type": "ContactInfo"
139+
}
140+
]
141+
}
142+
],
143+
"messages": { }
144+
}

hbase-handler/pom.xml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@
5757
<version>${junit.version}</version>
5858
<scope>test</scope>
5959
</dependency>
60+
<dependency>
61+
<groupId>org.apache.avro</groupId>
62+
<artifactId>avro</artifactId>
63+
<version>1.7.6</version>
64+
</dependency>
6065
</dependencies>
6166

6267
<profiles>
@@ -100,6 +105,12 @@
100105
<artifactId>hbase-hadoop-compat</artifactId>
101106
<version>${hbase.hadoop1.version}</version>
102107
</dependency>
108+
<dependency>
109+
<groupId>org.apache.hadoop</groupId>
110+
<artifactId>hadoop-test</artifactId>
111+
<version>${hadoop-20S.version}</version>
112+
<scope>test</scope>
113+
</dependency>
103114
<dependency>
104115
<groupId>org.apache.hbase</groupId>
105116
<artifactId>hbase-common</artifactId>
@@ -132,12 +143,26 @@
132143
<version>${hadoop-23.version}</version>
133144
<optional>true</optional>
134145
</dependency>
146+
<dependency>
147+
<groupId>org.apache.hadoop</groupId>
148+
<artifactId>hadoop-common</artifactId>
149+
<version>${hadoop-23.version}</version>
150+
<classifier>tests</classifier>
151+
<scope>test</scope>
152+
</dependency>
135153
<dependency>
136154
<groupId>org.apache.hadoop</groupId>
137155
<artifactId>hadoop-mapreduce-client-core</artifactId>
138156
<version>${hadoop-23.version}</version>
139157
<optional>true</optional>
140158
</dependency>
159+
<dependency>
160+
<groupId>org.apache.hadoop</groupId>
161+
<artifactId>hadoop-hdfs</artifactId>
162+
<version>${hadoop-23.version}</version>
163+
<classifier>tests</classifier>
164+
<scope>test</scope>
165+
</dependency>
141166
<dependency>
142167
<groupId>org.apache.hbase</groupId>
143168
<artifactId>hbase-hadoop2-compat</artifactId>
@@ -190,6 +215,12 @@
190215
<type>test-jar</type>
191216
<scope>test</scope>
192217
</dependency>
218+
<dependency>
219+
<groupId>com.sun.jersey</groupId>
220+
<artifactId>jersey-servlet</artifactId>
221+
<version>${jersey.version}</version>
222+
<scope>test</scope>
223+
</dependency>
193224
</dependencies>
194225
</profile>
195226
</profiles>
@@ -209,6 +240,42 @@
209240
</execution>
210241
</executions>
211242
</plugin>
243+
<plugin>
244+
<groupId>org.apache.avro</groupId>
245+
<artifactId>avro-maven-plugin</artifactId>
246+
<version>1.7.6</version>
247+
<executions>
248+
<execution>
249+
<phase>generate-test-sources</phase>
250+
<goals>
251+
<goal>protocol</goal>
252+
</goals>
253+
<configuration>
254+
<testSourceDirectory>${project.basedir}/if/test</testSourceDirectory>
255+
<testOutputDirectory>${project.basedir}/src/test</testOutputDirectory>
256+
</configuration>
257+
</execution>
258+
</executions>
259+
</plugin>
260+
<plugin>
261+
<groupId>org.codehaus.mojo</groupId>
262+
<artifactId>build-helper-maven-plugin</artifactId>
263+
<version>1.7</version>
264+
<executions>
265+
<execution>
266+
<id>add-test-sources</id>
267+
<phase>generate-test-sources</phase>
268+
<goals>
269+
<goal>add-test-source</goal>
270+
</goals>
271+
<configuration>
272+
<sources>
273+
<source>${project.basedir}/src/gen/avro/gen-java</source>
274+
</sources>
275+
</configuration>
276+
</execution>
277+
</executions>
278+
</plugin>
212279
</plugins>
213280
</build>
214281

hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,21 @@
2323

2424
package org.apache.hadoop.hive.hbase;
2525

26-
import com.google.common.collect.Iterators;
26+
import java.util.ArrayList;
27+
import java.util.Arrays;
28+
import java.util.Iterator;
29+
import java.util.List;
30+
import java.util.Properties;
31+
32+
import org.apache.commons.lang.StringUtils;
33+
import org.apache.hadoop.conf.Configuration;
2734
import org.apache.hadoop.hive.serde.serdeConstants;
2835
import org.apache.hadoop.hive.serde2.SerDeException;
2936
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
3037
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
3138
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
3239

33-
import java.util.ArrayList;
34-
import java.util.Iterator;
35-
import java.util.List;
40+
import com.google.common.collect.Iterators;
3641

3742
public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
3843

@@ -53,24 +58,41 @@ public int size() {
5358
return columnsMapping.length;
5459
}
5560

56-
String toTypesString() {
61+
String toNamesString(Properties tbl, String autogenerate) {
62+
if (autogenerate != null && autogenerate.equals("true")) {
63+
StringBuilder sb = new StringBuilder();
64+
HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
65+
return sb.toString();
66+
}
67+
68+
return StringUtils.EMPTY; // return empty string
69+
}
70+
71+
String toTypesString(Properties tbl, Configuration conf, String autogenerate)
72+
throws SerDeException {
5773
StringBuilder sb = new StringBuilder();
58-
for (ColumnMapping colMap : columnsMapping) {
59-
if (sb.length() > 0) {
60-
sb.append(":");
61-
}
62-
if (colMap.hbaseRowKey) {
63-
// the row key column becomes a STRING
64-
sb.append(serdeConstants.STRING_TYPE_NAME);
65-
} else if (colMap.qualifierName == null) {
66-
// a column family become a MAP
67-
sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
68-
+ serdeConstants.STRING_TYPE_NAME + ">");
69-
} else {
70-
// an individual column becomes a STRING
71-
sb.append(serdeConstants.STRING_TYPE_NAME);
74+
75+
if (autogenerate != null && autogenerate.equals("true")) {
76+
HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
77+
} else {
78+
for (ColumnMapping colMap : columnsMapping) {
79+
if (sb.length() > 0) {
80+
sb.append(":");
81+
}
82+
if (colMap.hbaseRowKey) {
83+
// the row key column becomes a STRING
84+
sb.append(serdeConstants.STRING_TYPE_NAME);
85+
} else if (colMap.qualifierName == null) {
86+
// a column family become a MAP
87+
sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
88+
+ serdeConstants.STRING_TYPE_NAME + ">");
89+
} else {
90+
// an individual column becomes a STRING
91+
sb.append(serdeConstants.STRING_TYPE_NAME);
92+
}
7293
}
7394
}
95+
7496
return sb.toString();
7597
}
7698

hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
package org.apache.hadoop.hive.hbase;
2020

2121
import java.util.ArrayList;
22+
import java.util.Collections;
2223
import java.util.List;
24+
import java.util.Map;
2325

2426
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
2527
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
@@ -94,4 +96,14 @@ public LazyObject<? extends ObjectInspector> toLazyObject(int fieldID, byte[] by
9496

9597
return lazyObject;
9698
}
99+
100+
/**
101+
* Return the different parts of the key. By default, this returns an empty map. Consumers can
102+
* choose to override this to provide their own names and types of parts of the key.
103+
*
104+
* @return map of parts name to their type
105+
* */
106+
public Map<String, String> getParts() {
107+
return Collections.emptyMap();
108+
}
97109
}

0 commit comments

Comments
 (0)