Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

org.apache.avro.SchemaParseException: Can't redefine: list #2058

Closed
heuermh opened this issue Sep 30, 2018 · 0 comments · Fixed by #2056
Closed

org.apache.avro.SchemaParseException: Can't redefine: list #2058

heuermh opened this issue Sep 30, 2018 · 0 comments · Fixed by #2056
Milestone

Comments

@heuermh
Copy link
Member

heuermh commented Sep 30, 2018

...
Cause: org.apache.avro.SchemaParseException: Can't redefine: list
  at org.apache.avro.Schema$Names.put(Schema.java:1128)
  at org.apache.avro.Schema$NamedSchema.writeNameRef(Schema.java:562)
  at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:690)
  at org.apache.avro.Schema$ArraySchema.toJson(Schema.java:805)
  at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:882)
  at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:716)
  at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:701)
  at org.apache.avro.Schema$UnionSchema.toJson(Schema.java:882)
  at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:716)
  at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:701)
  at org.apache.avro.Schema.toString(Schema.java:324)
  at org.apache.avro.SchemaCompatibility.checkReaderWriterCompatibility(SchemaCompatibility.java:68)
  at org.apache.parquet.avro.AvroRecordConverter.isElementType(AvroRecordConverter.java:866)
  at org.apache.parquet.avro.AvroIndexedRecordConverter$AvroArrayConverter.<init>(AvroIndexedRecordConverter.java:333)
  at org.apache.parquet.avro.AvroIndexedRecordConverter.newConverter(AvroIndexedRecordConverter.java:172)
  at org.apache.parquet.avro.AvroIndexedRecordConverter.<init>(AvroIndexedRecordConverter.java:94)
  at org.apache.parquet.avro.AvroIndexedRecordConverter.newConverter(AvroIndexedRecordConverter.java:168)
  at org.apache.parquet.avro.AvroIndexedRecordConverter.<init>(AvroIndexedRecordConverter.java:94)
  at org.apache.parquet.avro.AvroIndexedRecordConverter.<init>(AvroIndexedRecordConverter.java:66)
  at org.apache.parquet.avro.AvroCompatRecordMaterializer.<init>(AvroCompatRecordMaterializer.java:34)
  at org.apache.parquet.avro.AvroReadSupport.newCompatMaterializer(AvroReadSupport.java:144)
  at org.apache.parquet.avro.AvroReadSupport.prepareForRead(AvroReadSupport.java:136)
  at org.apache.parquet.hadoop.InternalParquetRecordReader.initialize(InternalParquetRecordReader.java:204)
  at org.apache.parquet.hadoop.ParquetRecordReader.initializeInternalReader(ParquetRecordReader.java:182)
  at org.apache.parquet.hadoop.ParquetRecordReader.initialize(ParquetRecordReader.java:140)
$ parquet-tools schema variants-from-dataset.adam/part-00000-20884749-ccd3-4a54-8700-104ac2f709d1-c000.snappy.parquet
message spark_schema {
  optional binary contigName (UTF8);
  optional int64 start;
  optional int64 end;
  optional group names (LIST) {
    repeated group list {
      optional binary element (UTF8);
    }
  }
  optional boolean splitFromMultiAllelic;
  optional binary referenceAllele (UTF8);
  optional binary alternateAllele (UTF8);
  optional double quality;
  optional boolean filtersApplied;
  optional boolean filtersPassed;
  optional group filtersFailed (LIST) {
    repeated group list {
      optional binary element (UTF8);
    }
  }
  optional group annotation {
    optional binary ancestralAllele (UTF8);
    optional int32 alleleCount;
    optional int32 readDepth;
    optional int32 forwardReadDepth;
    optional int32 reverseReadDepth;
    optional int32 referenceReadDepth;
    optional int32 referenceForwardReadDepth;
    optional int32 referenceReverseReadDepth;
    optional float alleleFrequency;
    optional binary cigar (UTF8);
    optional boolean dbSnp;
    optional boolean hapMap2;
    optional boolean hapMap3;
    optional boolean validated;
    optional boolean thousandGenomes;
    optional boolean somatic;
    optional group transcriptEffects (LIST) {
      repeated group list {
        optional group element {
          optional binary alternateAllele (UTF8);
          optional group effects (LIST) {
            repeated group list {
              optional binary element (UTF8);
            }
          }
          optional binary geneName (UTF8);
          optional binary geneId (UTF8);
          optional binary featureType (UTF8);
          optional binary featureId (UTF8);
          optional binary biotype (UTF8);
          optional int32 rank;
          optional int32 total;
          optional binary genomicHgvs (UTF8);
          optional binary transcriptHgvs (UTF8);
          optional binary proteinHgvs (UTF8);
          optional int32 cdnaPosition;
          optional int32 cdnaLength;
          optional int32 cdsPosition;
          optional int32 cdsLength;
          optional int32 proteinPosition;
          optional int32 proteinLength;
          optional int32 distance;
          optional group messages (LIST) {
            repeated group list {
              optional binary element (UTF8);
            }
          }
        }
      }
    }
    optional group attributes (MAP) {
      repeated group key_value {
        required binary key (UTF8);
        optional binary value (UTF8);
      }
    }
  }
}
$ parquet-tools schema variants-from-rdd.adam/part-r-00000.gz.parquet
message org.bdgenomics.formats.avro.Variant {
  optional binary contigName (UTF8);
  optional int64 start;
  optional int64 end;
  required group names (LIST) {
    repeated binary array (UTF8);
  }
  optional boolean splitFromMultiAllelic;
  optional binary referenceAllele (UTF8);
  optional binary alternateAllele (UTF8);
  optional double quality;
  optional boolean filtersApplied;
  optional boolean filtersPassed;
  required group filtersFailed (LIST) {
    repeated binary array (UTF8);
  }
  optional group annotation {
    optional binary ancestralAllele (UTF8);
    optional int32 alleleCount;
    optional int32 readDepth;
    optional int32 forwardReadDepth;
    optional int32 reverseReadDepth;
    optional int32 referenceReadDepth;
    optional int32 referenceForwardReadDepth;
    optional int32 referenceReverseReadDepth;
    optional float alleleFrequency;
    optional binary cigar (UTF8);
    optional boolean dbSnp;
    optional boolean hapMap2;
    optional boolean hapMap3;
    optional boolean validated;
    optional boolean thousandGenomes;
    optional boolean somatic;
    required group transcriptEffects (LIST) {
      repeated group array {
        optional binary alternateAllele (UTF8);
        required group effects (LIST) {
          repeated binary array (UTF8);
        }
        optional binary geneName (UTF8);
        optional binary geneId (UTF8);
        optional binary featureType (UTF8);
        optional binary featureId (UTF8);
        optional binary biotype (UTF8);
        optional int32 rank;
        optional int32 total;
        optional binary genomicHgvs (UTF8);
        optional binary transcriptHgvs (UTF8);
        optional binary proteinHgvs (UTF8);
        optional int32 cdnaPosition;
        optional int32 cdnaLength;
        optional int32 cdsPosition;
        optional int32 cdsLength;
        optional int32 proteinPosition;
        optional int32 proteinLength;
        optional int32 distance;
        required group messages (LIST) {
          repeated binary array (ENUM);
        }
      }
    }
    required group attributes (MAP) {
      repeated group map (MAP_KEY_VALUE) {
        required binary key (UTF8);
        required binary value (UTF8);
      }
    }
  }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant