Skip to content

Commit 428a2dd

Browse files
author
Megan Foss
committed
Added field validation for data types, indices, width. Includes creating two setters in field config to set default value for data types and calculate/set width based on indices.
1 parent 428a512 commit 428a2dd

File tree

4 files changed

+98
-53
lines changed

4 files changed

+98
-53
lines changed

contrib/format-fixedwidth/src/main/java/org/apache/drill/exec/store/fixedwidth/FixedwidthBatchReader.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ public FixedwidthBatchReader(FixedwidthFormatConfig config, int maxRecords) {
6565
this.maxRecords = maxRecords;
6666
}
6767

68-
6968
@Override
7069
public boolean open(FileSchemaNegotiator negotiator) {
7170
split = negotiator.split();
@@ -197,4 +196,4 @@ private boolean parseLine(String line, RowSetLoader writer) throws IOException {
197196
return true;
198197
}
199198

200-
}
199+
}

contrib/format-fixedwidth/src/main/java/org/apache/drill/exec/store/fixedwidth/FixedwidthFieldConfig.java

+18-28
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.drill.exec.store.fixedwidth;
2020

21+
import com.fasterxml.jackson.annotation.JsonCreator;
2122
import com.fasterxml.jackson.annotation.JsonInclude;
2223
import com.fasterxml.jackson.annotation.JsonProperty;
2324
import com.fasterxml.jackson.annotation.JsonTypeName;
@@ -29,12 +30,12 @@
2930

3031
@JsonTypeName("fixedwidthReaderFieldDescription")
3132
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
32-
public class FixedwidthFieldConfig {
33+
public class FixedwidthFieldConfig implements Comparable<FixedwidthFieldConfig> {
3334

3435
private final String name;
3536
private final int index;
36-
private final int width;
37-
private final TypeProtos.MinorType type;
37+
private int width;
38+
private TypeProtos.MinorType type;
3839
private final String dateTimeFormat;
3940

4041
public FixedwidthFieldConfig(@JsonProperty("name") String name,
@@ -44,41 +45,17 @@ public FixedwidthFieldConfig(@JsonProperty("name") String name,
4445
this(name, index, width, type, null);
4546
}
4647

48+
@JsonCreator
4749
public FixedwidthFieldConfig(@JsonProperty("name") String name,
4850
@JsonProperty("index") int index,
4951
@JsonProperty("width") int width,
5052
@JsonProperty("type") TypeProtos.MinorType type,
5153
@JsonProperty("dateTimeFormat") String dateTimeFormat) {
52-
5354
this.name = name;
5455
this.index = index;
5556
this.width = width;
5657
this.type = type;
5758
this.dateTimeFormat = dateTimeFormat;
58-
59-
60-
// Need to verify names are different - where can we access all the names of other columns
61-
// if(name != null){
62-
// this.name = name;
63-
// } else{
64-
// throw new IllegalArgumentException("Invalid name"); //Is this the right way to throw an exception if blank? What about if not valid SQL?
65-
// }
66-
//
67-
// if (index >= 0){
68-
// this.index = index;
69-
// } else {
70-
// throw new IllegalArgumentException("Index must be 0 or greater");
71-
// }
72-
//
73-
// //Can modify this to be optional and be calculated based on start index of this field and next
74-
// this.width = width;
75-
//
76-
// if (type == null){
77-
// this.type = TypeProtos.MinorType.VARCHAR;
78-
// } else {
79-
// this.type = type;
80-
// }
81-
// this.dateTimeFormat = dateTimeFormat; // No default required, null is allowed
8259
}
8360

8461
public String getName() {return name;}
@@ -87,8 +64,16 @@ public FixedwidthFieldConfig(@JsonProperty("name") String name,
8764

8865
public int getWidth() {return width;}
8966

67+
public void setWidth(int value) {
68+
this.width = value;
69+
}
70+
9071
public TypeProtos.MinorType getType() {return type;}
9172

73+
public void setType() {
74+
this.type = TypeProtos.MinorType.VARCHAR;
75+
}
76+
9277
public String getDateTimeFormat() {return dateTimeFormat;}
9378

9479
@Override
@@ -122,4 +107,9 @@ public String toString() {
122107
.field("dateTimeFormat", dateTimeFormat)
123108
.toString();
124109
}
110+
111+
@Override
112+
public int compareTo(FixedwidthFieldConfig o) {
113+
return new Integer(this.getIndex()).compareTo(o.getIndex());
114+
}
125115
}

contrib/format-fixedwidth/src/main/java/org/apache/drill/exec/store/fixedwidth/FixedwidthFormatConfig.java

+78-22
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@
2626
import org.apache.drill.common.PlanStringBuilder;
2727
import org.apache.drill.common.exceptions.UserException;
2828
import org.apache.drill.common.logical.FormatPluginConfig;
29+
import org.apache.drill.common.types.TypeProtos;
2930
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
3031
import org.slf4j.Logger;
3132
import org.slf4j.LoggerFactory;
3233

3334
import java.util.ArrayList;
35+
import java.util.Arrays;
3436
import java.util.Collections;
3537
import java.util.HashSet;
3638
import java.util.List;
@@ -43,11 +45,15 @@ public class FixedwidthFormatConfig implements FormatPluginConfig {
4345
private static final Logger logger = LoggerFactory.getLogger(FixedwidthFormatConfig.class);
4446
private final List<String> extensions;
4547
private final List<FixedwidthFieldConfig> fields;
48+
private final List<TypeProtos.MinorType> validDataTypes = Arrays.asList(new TypeProtos.MinorType[]{TypeProtos.MinorType.INT, TypeProtos.MinorType.VARCHAR,
49+
TypeProtos.MinorType.DATE, TypeProtos.MinorType.TIME, TypeProtos.MinorType.TIMESTAMP, TypeProtos.MinorType.FLOAT4,
50+
TypeProtos.MinorType.FLOAT8, TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARDECIMAL});
4651

4752
@JsonCreator
4853
public FixedwidthFormatConfig(@JsonProperty("extensions") List<String> extensions,
4954
@JsonProperty("fields") List<FixedwidthFieldConfig> fields) {
5055
this.extensions = extensions == null ? Collections.singletonList("fwf") : ImmutableList.copyOf(extensions);
56+
Collections.sort(fields);
5157
this.fields = fields;
5258

5359
validateFieldInput();
@@ -133,13 +139,56 @@ public List<Integer> getFieldWidths() {
133139
return result;
134140
}
135141

142+
@JsonIgnore
143+
public void setFieldWidths(int i, int value) {
144+
for (FixedwidthFieldConfig field : fields) {
145+
if (field.getIndex() == i) {
146+
field.setWidth(value);
147+
}
148+
}
149+
}
150+
151+
@JsonIgnore
152+
public List<TypeProtos.MinorType> getFieldTypes() {
153+
List<TypeProtos.MinorType> result = new ArrayList<>();
154+
if (! hasFields()) {
155+
return result;
156+
}
157+
158+
for (FixedwidthFieldConfig field : fields) {
159+
result.add(field.getType());
160+
}
161+
return result;
162+
}
163+
164+
@JsonIgnore
165+
public void setFieldTypes(int i) {
166+
for (FixedwidthFieldConfig field : fields) {
167+
if (field.getIndex() == i) {
168+
field.setType();
169+
}
170+
}
171+
}
172+
136173
@JsonIgnore
137174
public void validateFieldInput(){
138175
Set<String> uniqueNames = new HashSet<>();
139-
for (String name : this.getFieldNames()){
140-
/*if (name.length() == 0){
176+
List<Integer> fieldIndices = this.getFieldIndices();
177+
List<Integer> fieldWidths = this.getFieldWidths();
178+
List<String> fieldNames = this.getFieldNames();
179+
List<TypeProtos.MinorType> fieldTypes = this.getFieldTypes();
180+
int width = 0;
181+
int prevIndexAndWidth = -1;
141182

142-
}*/
183+
// Ensure no two fields have the same name
184+
for (String name : this.getFieldNames()){
185+
if (name.length() == 0){
186+
throw UserException
187+
.validationError()
188+
.message("Blank field name detected.")
189+
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
190+
.build(logger);
191+
}
143192
if (uniqueNames.contains(name)){
144193
throw UserException
145194
.validationError()
@@ -149,10 +198,6 @@ public void validateFieldInput(){
149198
}
150199
uniqueNames.add(name);
151200
}
152-
List<Integer> fieldIndices = this.getFieldIndices();
153-
List<Integer> fieldWidths = this.getFieldWidths();
154-
List<String> fieldNames = this.getFieldNames();
155-
int prevIndexAndWidth = -1;
156201

157202
//assuming that fieldIndices is the same size as fieldWidths, width is required
158203
for (int i = 0; i<fieldIndices.size(); i++) {
@@ -163,24 +208,35 @@ public void validateFieldInput(){
163208
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
164209
.build(logger);
165210
}
166-
/*
167-
else if (fieldWidths.get(i) == null || fieldWidths.get(i) < 1) {
211+
else if (fieldIndices.get(i) <= prevIndexAndWidth) {
212+
throw UserException
213+
.validationError()
214+
.message("Overlapping fields: " + fieldNames.get(i-1) + " and " + fieldNames.get(i))
215+
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
216+
.build(logger);
217+
}
218+
219+
if (fieldWidths.get(i) == null || fieldWidths.get(i) < 1) {
220+
// Come back to this - can we calculate this instead of throwing an error?
168221
if (i == fieldIndices.size()-1) {
169-
Integer width =
222+
throw UserException
223+
.validationError()
224+
.message("Width for field '" + fieldNames.get(i) + "' is empty.")
225+
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
226+
.build(logger);
170227
}
171-
Integer width = fieldIndices.get(i+1) - fieldIndices.get(i);
172-
fieldWidths.set(i, width);
228+
width = fieldIndices.get(i+1) - fieldIndices.get(i) - 1;
229+
setFieldWidths(fieldIndices.get(i), width);
230+
}
231+
prevIndexAndWidth = fieldIndices.get(i) + fieldWidths.get(i);
232+
233+
// Validate Field Type
234+
if (fieldTypes.get(i) == null || fieldTypes.get(i).toString().length() == 0) {
235+
setFieldTypes(fieldIndices.get(i));
236+
}
237+
else if (!validDataTypes.contains(fieldTypes.get(i))){
238+
setFieldTypes(fieldIndices.get(i)); //Should we throw an error or default to VARCHAR for data types that are not yet available in this plugin
173239
}
174-
*/
175-
else if (fieldIndices.get(i) <= prevIndexAndWidth) {
176-
throw UserException
177-
.validationError()
178-
.message("Overlapping fields: " + fieldNames.get(i-1) + " and " + fieldNames.get(i))
179-
.addContext("Plugin", FixedwidthFormatPlugin.DEFAULT_NAME)
180-
.build(logger);
181-
}
182-
prevIndexAndWidth = fieldIndices.get(i) + fieldWidths.get(i);
183240
}
184241
}
185-
186242
}

contrib/format-fixedwidth/src/test/java/org/apache/drill/exec/store/fixedwidth/TestFixedwidthRecordReader.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ public static void setup() throws Exception {
5151
FixedwidthFormatConfig formatConfig = new FixedwidthFormatConfig(Lists.newArrayList("fwf"),
5252
Lists.newArrayList(
5353
new FixedwidthFieldConfig("Number", 1, 5, TypeProtos.MinorType.VARDECIMAL),
54-
new FixedwidthFieldConfig("Letter", 7,4, TypeProtos.MinorType.VARCHAR, ""),
5554
new FixedwidthFieldConfig("Address",12, 3,TypeProtos.MinorType.INT, ""),
55+
new FixedwidthFieldConfig("Letter", 7,4, TypeProtos.MinorType.VARCHAR, ""),
5656
new FixedwidthFieldConfig("Date",16, 10,TypeProtos.MinorType.DATE, "MM-dd-yyyy"),
5757
new FixedwidthFieldConfig( "Time", 27, 8,TypeProtos.MinorType.TIME,"HH:mm:ss" ),
5858
new FixedwidthFieldConfig("DateTime", 36, 23,TypeProtos.MinorType.TIMESTAMP, "MM-dd-yyyy'T'HH:mm:ss.SSX" )

0 commit comments

Comments
 (0)