diff --git a/src/commands/annotate_cmd.rs b/src/commands/annotate_cmd.rs index 7e4e6b1..3534706 100644 --- a/src/commands/annotate_cmd.rs +++ b/src/commands/annotate_cmd.rs @@ -34,6 +34,7 @@ pub fn annotate_main( e }) .collect(); + EchtVars::add_cmd_header(&mut header, &ipath, &opath, &include_expr, epaths); let parser = fasteval::Parser::new(); let mut slab = fasteval::Slab::new(); diff --git a/src/commands/encoder_cmd.rs b/src/commands/encoder_cmd.rs index 7081909..ef1f723 100644 --- a/src/commands/encoder_cmd.rs +++ b/src/commands/encoder_cmd.rs @@ -187,7 +187,7 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) { let mut lookups = HashMap::new(); for f in fields.iter_mut() { - let (tt, _tl) = if f.field == "FILTER" { + let (tt, tl) = if f.field == "FILTER" { (TagType::String, TagLength::Variable) } else { header @@ -216,7 +216,18 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) { "[echtvar] unsupported field type: {:?} for field {}", tt, f.field ), - } + }; + match tl { + TagLength::Fixed(value) => f.number = value.to_string(), + TagLength::AltAlleles => f.number = "A".to_string(), + TagLength::Alleles => f.number = "R".to_string(), + TagLength::Genotypes => f.number = "G".to_string(), + TagLength::Variable => f.number = ".".to_string(), + _ => panic!( + "[echtvar] unsupported field length: {:?} for field {}", + tl, f.field + ), + }; } let zfile = std::fs::File::create(&zpath).unwrap(); diff --git a/src/lib/echtvar.rs b/src/lib/echtvar.rs index cce4d7c..1e5bd00 100644 --- a/src/lib/echtvar.rs +++ b/src/lib/echtvar.rs @@ -177,8 +177,13 @@ impl EchtVars { for e in &self.fields { header.push_record( format!( - "##INFO=", + "##INFO=", e.alias, + if vec!["A", "R", "G"].iter().any(|n| n == &e.number) { + "1" + } else { + &e.number + }, if e.ftype == fields::FieldType::Integer { "Integer" } else if e.ftype == fields::FieldType::Categorical { @@ -186,12 +191,27 @@ impl EchtVars { } else { "Float" }, - format!("added by echtvar from {}", path) + if &e.description.to_string() == "added by echtvar" { + format!("added by echtvar from {}", path) + } else { + format!("added by echtvar {}", e.description.to_string()) + } ) .as_bytes(), ); } } + pub fn add_cmd_header(header: &mut bcf::header::Header, vpath: &str, opath: &str, include_expr: &Option<&str>, epaths: Vec<&str>) { + header.push_record( + format!( + "##echtvar_annoCommand=anno -i {:?} {} {} -e {:?}", + include_expr, + vpath, + opath, + epaths.join(" -e ") + ).as_bytes(), + ); + } #[inline(always)] pub fn set_position( diff --git a/src/lib/fields.rs b/src/lib/fields.rs index 7755dd9..d16b7c3 100644 --- a/src/lib/fields.rs +++ b/src/lib/fields.rs @@ -15,6 +15,8 @@ pub struct Field { pub missing_value: i32, #[serde(default = "default_missing_string")] pub missing_string: std::string::String, + #[serde(default = "default_description_string")] + pub description: std::string::String, #[serde(default)] pub zigzag: bool, @@ -23,6 +25,11 @@ pub struct Field { pub multiplier: u32, #[serde(default)] pub ftype: FieldType, + #[serde(default)] + pub number: std::string::String, + + + #[serde(default = "default_values_i", skip_serializing)] pub values_i: usize, } @@ -33,6 +40,9 @@ fn default_missing_value() -> i32 { fn default_missing_string() -> std::string::String { "MISSING".to_string() } +fn default_description_string() -> std::string::String { + "added by echtvar".to_string() +} fn default_multiplier() -> u32 { 1 } @@ -47,9 +57,11 @@ impl Default for Field { alias: "name".to_string(), missing_value: -1, missing_string: "MISSING".to_string(), + description: "added by echtvar".to_string(), zigzag: false, multiplier: 1, ftype: FieldType::Integer, + number: ".".to_string(), values_i: usize::MAX, } } diff --git a/tests/big.sh b/tests/big.sh index 332dcc9..4139bae 100644 --- a/tests/big.sh +++ b/tests/big.sh @@ -16,6 +16,10 @@ for mod in 2 3 4 5; do # check that some variants remain unannotated python3 check.py anno.vcf.gz $mod + # check custom INFO Description used from config + python3 check-string-for-issue33.py anno.vcf.gz aval 1 "added by echtvar TEST description field" + # check A/R/G converted to 1 for number + python3 check-string-for-issue33.py anno.vcf.gz external_AC 1 "added by echtvar Theoretical AC from another source" if [[ "mod" -ne "1" ]]; then @@ -24,6 +28,10 @@ for mod in 2 3 4 5; do $echtvar encode test.echtvar1 test1.hjson generated-subset1.vcf $echtvar anno generated-all.vcf -e test.echtvar0 -e test.echtvar1 anno.vcf.gz python3 check.py anno.vcf.gz 1 + # check default Description used + python3 check-string-for-issue33.py anno.vcf.gz aval1 1 "added by echtvar from test.echtvar1" + # check value . is left alone + python3 check-string-for-issue33.py anno.vcf.gz external_str . "added by echtvar from test.echtvar1" fi diff --git a/tests/check-string-for-issue33.py b/tests/check-string-for-issue33.py new file mode 100644 index 0000000..ac732d6 --- /dev/null +++ b/tests/check-string-for-issue33.py @@ -0,0 +1,29 @@ +""" +Small test to ensure expected INFO fields generated +""" +import sys +import gzip + +fpath = sys.argv[1] +field = sys.argv[2] +num = sys.argv[3] +desc = sys.argv[4] + +ok = False +for line in gzip.open(sys.argv[1], 'rt'): + if line.startswith("##INFO=