From 672c9879fc18fd9be9b2b6d08362bedeed6dd344 Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Tue, 31 Oct 2023 08:32:23 -0400 Subject: [PATCH 01/13] fix: MD to RST link formatting in overview --- docs/source/overview.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/overview.rst b/docs/source/overview.rst index d3d638f..37fa9a2 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -3,7 +3,7 @@ Overview `alevin-fry`` is a suite of tools for the rapid, accurate and memory-frugal processing single-cell and single-nucleus sequencing data. It consumes RAD files generated by `salmon alevin`, and performs common operations like generating permit lists, and estimating the number of distinct molecules from each gene within each cell. The focus in `alevin-fry`` is on safety, accuracy and efficiency (in terms of both time and memory usage). -You can read the paper describing alevin fry, "Alevin-fry unlocks rapid, accurate, and memory-frugal quantification of single-cell RNA-seq data" [here](https://www.nature.com/articles/s41592-022-01408-3), and the pre-print [on bioRxiv](https://www.biorxiv.org/content/10.1101/2021.06.29.450377v1). +You can read the paper describing alevin fry, "Alevin-fry unlocks rapid, accurate, and memory-frugal quantification of single-cell RNA-seq data" `here `_, and the pre-print `on bioRxiv `_. Other resources for alevin-fry ============================== From 06e23f29beda57c8618e5eedef8c95585fe64072 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Tue, 5 Mar 2024 10:27:53 -0500 Subject: [PATCH 02/13] compiling with libradicl 0.8.2-pre not yet tested --- Cargo.lock | 308 ++++++++++++++++++++++++++++++++++++++-------- Cargo.toml | 4 +- src/cellfilter.rs | 89 +++++++++----- src/collate.rs | 32 +++-- src/convert.rs | 110 +++++++++-------- src/eq_class.rs | 9 +- src/io_utils.rs | 23 ++-- src/main.rs | 2 +- src/pugutils.rs | 5 +- src/quant.rs | 65 +++++++--- 10 files changed, 467 insertions(+), 180 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 26619e3..d6f18e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,14 +10,15 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] @@ -56,7 +57,7 @@ dependencies = [ "rand", "rust-htslib", "sce", - "scroll", + "scroll 0.11.0", "serde", "serde_json", "slog", @@ -147,9 +148,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.71" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] name = "approx" @@ -203,9 +204,9 @@ dependencies = [ [[package]] name = "bio-types" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c915bf6c578d40e1e497f8c571a4514bc89c3195cec2abb8be6dd5500405c752" +checksum = "9d45749b87f21808051025e9bf714d14ff4627f9d8ca967eade6946ea769aa4a" dependencies = [ "derive-new", "lazy_static", @@ -215,20 +216,31 @@ dependencies = [ "thiserror", ] +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + [[package]] name = "bstr" -version = "1.5.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", - "once_cell", "regex-automata", "serde", ] @@ -267,6 +279,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + [[package]] name = "bzip2" version = "0.4.4" @@ -337,7 +355,7 @@ checksum = "2c9b4a88bb4bc35d3d6f65a21b0f0bafe9c894fa00978de242c555ec28bea1c0" dependencies = [ "anstream", "anstyle", - "bitflags", + "bitflags 1.3.2", "clap_lex", "once_cell", "strsim", @@ -353,7 +371,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.52", ] [[package]] @@ -487,12 +505,12 @@ checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" [[package]] name = "dashmap" -version = "5.4.0" +version = "5.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown", + "hashbrown 0.14.3", "lock_api", "once_cell", "parking_lot_core", @@ -542,6 +560,12 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.1" @@ -620,6 +644,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + [[package]] name = "heck" version = "0.4.1" @@ -701,7 +731,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +dependencies = [ + "equivalent", + "hashbrown 0.14.3", ] [[package]] @@ -788,6 +828,70 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.147" @@ -812,15 +916,17 @@ dependencies = [ [[package]] name = "libradicl" -version = "0.6.0" -source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#ea255cc40219192feb328d75e485886341853f0b" +version = "0.8.1" +source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#232b2b32bc2b8a1aebd5f11dad6c495d4f0c86c1" dependencies = [ "ahash", + "anyhow", "bio-types", "dashmap", + "noodles-bam", + "noodles-sam", "num", - "rust-htslib", - "scroll", + "scroll 0.12.0", "serde", "smallvec", "snap", @@ -890,9 +996,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memoffset" @@ -986,11 +1092,75 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "noodles-bam" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3189e8ecee801ab5c3f4ea9908c4196b429137d8d35d733f00f6681f9188be7" +dependencies = [ + "bit-vec", + "bstr", + "byteorder", + "bytes", + "indexmap 2.2.5", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "noodles-sam", +] + +[[package]] +name = "noodles-bgzf" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8970db2e84adb1007377dd3988258d7a64e3fc4c05602ebf94e1f8cba207c030" +dependencies = [ + "byteorder", + "bytes", + "crossbeam-channel", + "flate2", +] + +[[package]] +name = "noodles-core" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7336c3be652de4e05444c9b12a32331beb5ba3316e8872d92bfdd8ef3b06c282" + +[[package]] +name = "noodles-csi" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a60dfe0919f7ecbd081a82eb1d32e8f89f9041932d035fe8309073c8c01277bf" +dependencies = [ + "bit-vec", + "byteorder", + "indexmap 2.2.5", + "noodles-bgzf", + "noodles-core", +] + +[[package]] +name = "noodles-sam" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f0d8e441368374f6e144989f823fd7c05e58cdaa3f97d22bb4d75b534327b87" +dependencies = [ + "bitflags 2.4.2", + "bstr", + "indexmap 2.2.5", + "lexical-core", + "memchr", + "noodles-bgzf", + "noodles-core", + "noodles-csi", +] + [[package]] name = "num" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", "num-complex 0.4.3", @@ -1146,7 +1316,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 1.9.3", ] [[package]] @@ -1169,9 +1339,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.63" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -1184,9 +1354,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.29" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -1265,7 +1435,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1274,7 +1444,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1290,26 +1460,32 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.4" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", + "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.1.10" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rust-htslib" @@ -1347,7 +1523,7 @@ version = "0.37.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", @@ -1408,6 +1584,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" +[[package]] +name = "scroll" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" + [[package]] name = "semver" version = "0.1.20" @@ -1416,22 +1598,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.164" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.164" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.52", ] [[package]] @@ -1491,9 +1673,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.10.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "snap" @@ -1516,6 +1698,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "statrs" version = "0.16.0" @@ -1561,9 +1749,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.22" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -1614,7 +1802,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.52", ] [[package]] @@ -1799,7 +1987,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.52", "wasm-bindgen-shared", ] @@ -1821,7 +2009,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.22", + "syn 2.0.52", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2013,3 +2201,23 @@ checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" dependencies = [ "lzma-sys", ] + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", +] diff --git a/Cargo.toml b/Cargo.toml index a69f6cf..5ec17a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,8 +37,8 @@ categories = ["command-line-utilities", "science"] [dependencies] # for local development, look in the libradicl git repository # but when published, pull the specified version -libradicl = { git = "https://github.com/COMBINE-lab/libradicl", branch = "develop", version = "0.6.0" } -anyhow = "1.0.71" +libradicl = { git = "https://github.com/COMBINE-lab/libradicl", branch = "develop", version = "0.8.0" } +anyhow = "1.0.80" arrayvec = "0.7.4" ahash = "0.8.3" bincode = "1.3.3" diff --git a/src/cellfilter.rs b/src/cellfilter.rs index 98d83b9..396c26b 100644 --- a/src/cellfilter.rs +++ b/src/cellfilter.rs @@ -7,7 +7,7 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use slog::crit; use slog::info; @@ -19,8 +19,13 @@ use bio_types::strand::Strand; use bstr::io::BufReadExt; use itertools::Itertools; use libradicl::exit_codes; -use libradicl::rad_types; +use libradicl::rad_types::{self, RadType, TagValue}; use libradicl::BarcodeLookupMap; +use libradicl::{ + chunk, + header::RadPrelude, + record::{AlevinFryReadRecord, AlevinFryRecordContext}, +}; use needletail::bitkmer::*; use num_format::{Locale, ToFormattedString}; use serde::Serialize; @@ -222,7 +227,7 @@ fn populate_unfiltered_barcode_map( fn process_unfiltered( mut hm: HashMap, mut unmatched_bc: Vec, - ft_vals: &rad_types::FileTags, + file_tag_map: &rad_types::TagMap, filter_meth: &CellFilterMethod, expected_ori: Strand, output_dir: &PathBuf, @@ -279,9 +284,20 @@ fn process_unfiltered( num_passing.to_formatted_string(&Locale::en) ); + let barcode_tag = file_tag_map + .get("cblen") + .expect("tag map must contain cblen"); + let barcode_len = match barcode_tag { + &TagValue::U8(x) => x as u16, + &TagValue::U16(x) => x, + &TagValue::U32(x) => x as u16, + &TagValue::U64(x) => x as u16, + _ => bail!("unexpected tag type"), + }; + // now, we create a second barcode map with just the barcodes // for cells we will keep / rescue. - let bcmap2 = BarcodeLookupMap::new(kept_bc, ft_vals.bclen as u32); + let bcmap2 = BarcodeLookupMap::new(kept_bc, barcode_len as u32); info!( log, "found {} cells with non-trivial number of reads by exact barcode match", @@ -382,7 +398,7 @@ fn process_unfiltered( })?; let o_path = parent.join("permit_freq.bin"); - match afutils::write_permit_list_freq(&o_path, ft_vals.bclen, &hm) { + match afutils::write_permit_list_freq(&o_path, barcode_len, &hm) { Ok(_) => {} Err(error) => { panic!("Error: {}", error); @@ -444,7 +460,7 @@ fn process_unfiltered( #[allow(clippy::unnecessary_unwrap, clippy::too_many_arguments)] fn process_filtered( hm: &HashMap, - ft_vals: &rad_types::FileTags, + file_tag_map: &rad_types::TagMap, filter_meth: &CellFilterMethod, expected_ori: Strand, output_dir: &PathBuf, @@ -460,6 +476,17 @@ fn process_filtered( freq.sort_unstable(); freq.reverse(); + let barcode_tag = file_tag_map + .get("cblen") + .expect("tag map must contain cblen"); + let barcode_len = match barcode_tag { + &TagValue::U8(x) => x as u16, + &TagValue::U16(x) => x, + &TagValue::U32(x) => x as u16, + &TagValue::U64(x) => x as u16, + _ => bail!("unexpected tag type"), + }; + // select from among supported filter methods match filter_meth { CellFilterMethod::KneeFinding => { @@ -489,7 +516,7 @@ fn process_filtered( valid_bc = permit_list_from_threshold(hm, min_freq); } CellFilterMethod::ExplicitList(valid_bc_file) => { - valid_bc = permit_list_from_file(valid_bc_file, ft_vals.bclen); + valid_bc = permit_list_from_file(valid_bc_file, barcode_len); } CellFilterMethod::ExpectCells(expected_num_cells) => { let robust_quantile = 0.99f64; @@ -509,7 +536,7 @@ fn process_filtered( // generate the map from each permitted barcode to all barcodes within // edit distance 1 of it. let full_permit_list = - afutils::generate_permitlist_map(&valid_bc, ft_vals.bclen as usize).unwrap(); + afutils::generate_permitlist_map(&valid_bc, barcode_len as usize).unwrap(); let s2 = ahash::RandomState::with_seeds(2u64, 7u64, 1u64, 8u64); let mut permitted_map = HashMap::with_capacity_and_hasher(valid_bc.len(), s2); @@ -532,7 +559,7 @@ fn process_filtered( })?; let o_path = parent.join("permit_freq.bin"); - match afutils::write_permit_list_freq(&o_path, ft_vals.bclen, &permitted_map) { + match afutils::write_permit_list_freq(&o_path, barcode_len, &permitted_map) { Ok(_) => {} Err(error) => { panic!("Error: {}", error); @@ -541,7 +568,7 @@ fn process_filtered( let o_path = parent.join("all_freq.bin"); - match afutils::write_permit_list_freq(&o_path, ft_vals.bclen, hm) { + match afutils::write_permit_list_freq(&o_path, barcode_len, hm) { Ok(_) => {} Err(error) => { panic!("Error: {}", error); @@ -630,7 +657,9 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result let i_file = File::open(i_dir.join("map.rad")).context("could not open input rad file")?; let mut br = BufReader::new(i_file); - let hdr = rad_types::RadHeader::from_bytes(&mut br); + + let prelude = RadPrelude::from_bytes(&mut br)?; + let hdr = &prelude.hdr; info!( log, "paired : {:?}, ref_count : {}, num_chunks : {}", @@ -638,24 +667,25 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result hdr.ref_count.to_formatted_string(&Locale::en), hdr.num_chunks.to_formatted_string(&Locale::en) ); + // file-level - let fl_tags = rad_types::TagSection::from_bytes(&mut br); + let fl_tags = &prelude.file_tags; info!(log, "read {:?} file-level tags", fl_tags.tags.len()); // read-level - let rl_tags = rad_types::TagSection::from_bytes(&mut br); + let rl_tags = &prelude.read_tags; info!(log, "read {:?} read-level tags", rl_tags.tags.len()); // right now, we only handle BC and UMI types of U8—U64, so validate that const BNAME: &str = "b"; const UNAME: &str = "u"; - let mut bct: Option = None; - let mut umit: Option = None; + let mut bct: Option = None; + let mut umit: Option = None; for rt in &rl_tags.tags { // if this is one of our tags if rt.name == BNAME || rt.name == UNAME { - if rad_types::decode_int_type_tag(rt.typeid).is_none() { + if !rt.typeid.is_int_type() { crit!( log, "currently only RAD types 1--4 are supported for 'b' and 'u' tags." @@ -671,21 +701,19 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result } } } + assert!(bct.is_some(), "barcode type tag must be present."); + assert!(umit.is_some(), "umi type tag must be present."); // alignment-level - let al_tags = rad_types::TagSection::from_bytes(&mut br); + let al_tags = &prelude.aln_tags; info!(log, "read {:?} alignemnt-level tags", al_tags.tags.len()); - let ft_vals = rad_types::FileTags::from_bytes(&mut br); - info!(log, "File-level tag values {:?}", ft_vals); + let file_tag_map = prelude.file_tags.parse_tags_from_bytes(&mut br)?; + info!(log, "File-level tag values {:?}", file_tag_map); + let record_context = prelude.get_record_context::()?; let mut num_reads: usize = 0; - let bc_type = rad_types::decode_int_type_tag(bct.expect("no barcode tag description present.")) - .context("unknown barcode type id.")?; - let umi_type = rad_types::decode_int_type_tag(umit.expect("no umi tag description present")) - .context("unknown barcode type id.")?; - // if dealing with filtered type let s = ahash::RandomState::with_seeds(2u64, 7u64, 1u64, 8u64); let mut hm = HashMap::with_hasher(s); @@ -702,7 +730,8 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result // the unfiltered_bc_count map must be valid in this branch if let Some(mut hmu) = unfiltered_bc_counts { for _ in 0..(hdr.num_chunks as usize) { - let c = rad_types::Chunk::from_bytes(&mut br, &bc_type, &umi_type); + let c = + chunk::Chunk::::from_bytes(&mut br, &record_context); num_orientation_compat_reads += update_barcode_hist_unfiltered( &mut hmu, &mut unmatched_bc, @@ -723,7 +752,7 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result process_unfiltered( hmu, unmatched_bc, - &ft_vals, + &file_tag_map, &filter_meth, expected_ori, output_dir, @@ -740,7 +769,7 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result } _ => { for _ in 0..(hdr.num_chunks as usize) { - let c = rad_types::Chunk::from_bytes(&mut br, &bc_type, &umi_type); + let c = chunk::Chunk::::from_bytes(&mut br, &record_context); update_barcode_hist(&mut hm, &mut max_ambiguity_read, &c, &expected_ori); num_reads += c.reads.len(); } @@ -753,7 +782,7 @@ pub fn generate_permit_list(gpl_opts: GenPermitListOpts) -> anyhow::Result ); process_filtered( &hm, - &ft_vals, + &file_tag_map, &filter_meth, expected_ori, output_dir, @@ -900,7 +929,7 @@ pub fn update_barcode_hist_unfiltered( hist: &mut HashMap, unmatched_bc: &mut Vec, max_ambiguity_read: &mut usize, - chunk: &rad_types::Chunk, + chunk: &chunk::Chunk, expected_ori: &Strand, ) -> usize { let mut num_strand_compat_reads = 0usize; @@ -964,7 +993,7 @@ pub fn update_barcode_hist_unfiltered( pub fn update_barcode_hist( hist: &mut HashMap, max_ambiguity_read: &mut usize, - chunk: &rad_types::Chunk, + chunk: &chunk::Chunk, expected_ori: &Strand, ) { match expected_ori { diff --git a/src/collate.rs b/src/collate.rs index 40be896..6a151bb 100644 --- a/src/collate.rs +++ b/src/collate.rs @@ -16,8 +16,13 @@ use crate::utils::InternalVersionInfo; use bio_types::strand::{Strand, StrandError}; use crossbeam_queue::ArrayQueue; // use dashmap::DashMap; + +use libradicl::chunk; +use libradicl::header::{RadHeader, RadPrelude}; use libradicl::rad_types; +use libradicl::record::AlevinFryReadRecord; use libradicl::schema::TempCellInfo; + use num_format::{Locale, ToFormattedString}; use scroll::{Pread, Pwrite}; use serde_json::json; @@ -365,7 +370,7 @@ where let i_file = File::open(&input_rad_path).context("couldn't open input RAD file")?; let mut br = BufReader::new(i_file); - let hdr = rad_types::RadHeader::from_bytes(&mut br); + let hdr = RadHeader::from_bytes(&mut br)?; // the exact position at the end of the header, // precisely sizeof(u64) bytes beyond the num_chunks field. @@ -381,17 +386,22 @@ where ); // file-level - let fl_tags = rad_types::TagSection::from_bytes(&mut br); + let fl_tags = rad_types::TagSection::from_bytes(&mut br)?; info!(log, "read {:?} file-level tags", fl_tags.tags.len()); // read-level - let rl_tags = rad_types::TagSection::from_bytes(&mut br); + let rl_tags = rad_types::TagSection::from_bytes(&mut br)?; info!(log, "read {:?} read-level tags", rl_tags.tags.len()); // alignment-level - let al_tags = rad_types::TagSection::from_bytes(&mut br); + let al_tags = rad_types::TagSection::from_bytes(&mut br)?; info!(log, "read {:?} alignemnt-level tags", al_tags.tags.len()); - let ft_vals = rad_types::FileTags::from_bytes(&mut br); - info!(log, "File-level tag values {:?}", ft_vals); + // create the prelude and rebind the variables we need + let prelude = RadPrelude::from_header_and_tag_sections(hdr, fl_tags, rl_tags, al_tags); + let hdr = &prelude.hdr; + let rl_tags = &prelude.read_tags; + + let file_tag_map = prelude.file_tags.parse_tags_from_bytes(&mut br); + info!(log, "File-level tag values {:?}", file_tag_map); let bct = rl_tags.tags[0].typeid; let umit = rl_tags.tags[1].typeid; @@ -456,10 +466,10 @@ where correct_map.len().to_formatted_string(&Locale::en) ); - let cc = rad_types::ChunkConfig { + let cc = chunk::ChunkConfig { num_chunks: hdr.num_chunks, - bc_type: bct, - umi_type: umit, + bc_type: libradicl::rad_types::encode_type_tag(bct).expect("valid barcode tag type"), + umi_type: libradicl::rad_types::encode_type_tag(umit).expect("valid umi tag type"), }; // TODO: see if we can do this without the Arc @@ -631,7 +641,7 @@ where // worker threads. let mut buf = vec![0u8; 65536]; for cell_num in 0..(cc.num_chunks as usize) { - let (nbytes_chunk, nrec_chunk) = rad_types::Chunk::read_header(&mut br); + let (nbytes_chunk, nrec_chunk) = chunk::Chunk::::read_header(&mut br); buf.resize(nbytes_chunk as usize, 0); buf.pwrite::(nbytes_chunk, 0)?; buf.pwrite::(nrec_chunk, 4)?; @@ -720,7 +730,7 @@ where let bc_type = rad_types::decode_int_type_tag(cc.bc_type).context("unknown barcode type id.")?; let umi_type = - rad_types::decode_int_type_tag(cc.umi_type).context("unknown barcode type id.")?; + rad_types::decode_int_type_tag(cc.umi_type).context("unknown umi type id.")?; // have access to the input directory let input_dir: PathBuf = input_dir.clone(); // the output file diff --git a/src/convert.rs b/src/convert.rs index 47fe434..a9618c8 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -7,7 +7,7 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ -use anyhow::Context; +use anyhow::bail; use indicatif::{ProgressBar, ProgressStyle}; use slog::{crit, info}; //use num_format::{Locale}; @@ -15,13 +15,19 @@ use std::fs; use std::fs::File; use std::io::{stdout, BufReader, BufWriter, Cursor, Seek, SeekFrom, Write}; // use std::sync::{Arc, Mutex}; -use libradicl::rad_types; +// +use rust_htslib::{bam, bam::record::Aux, bam::Read}; + +use libradicl::rad_types::{self, RadType, TagValue}; use libradicl::utils::MASK_LOWER_31_U32; +use libradicl::{ + chunk, + header::RadPrelude, + record::{AlevinFryReadRecord, AlevinFryRecordContext}, +}; + use needletail::bitkmer::*; use rand::Rng; -use rust_htslib::bam::HeaderView; -use rust_htslib::{bam, bam::record::Aux, bam::Read}; -use std::collections::HashMap; use std::error::Error; use std::path::Path; use std::str; @@ -78,20 +84,6 @@ pub fn cb_string_to_u64(cb_str: &[u8]) -> Result> { Ok(cb_id) } -#[allow(dead_code)] -pub fn tid_2_contig(h: &HeaderView) -> HashMap { - let mut dict: HashMap = HashMap::with_capacity(46); - for (i, t) in h - .target_names() - .iter() - .map(|a| str::from_utf8(a).unwrap()) - .enumerate() - { - dict.insert(i as u32, t.to_owned()); - } - dict -} - pub fn bam2rad(input_file: P1, rad_file: P2, num_threads: u32, log: &slog::Logger) where P1: AsRef, @@ -121,7 +113,6 @@ where } let hdrv = bam.header().to_owned(); - // let tid_lookup: HashMap = tid_2_contig(&hdrv); let mut data = Cursor::new(vec![]); // initialize the header (do we need this ?) // let mut hdr = libradicl::RadHeader::from_bam_header(&hdrv); @@ -193,12 +184,12 @@ where let mut umi_tag_str = "ulen"; // str - type - rad_types::write_str_bin(cb_tag_str, &rad_types::RadIntId::U16, &mut data); + libradicl::io::write_str_bin(cb_tag_str, &rad_types::RadIntId::U16, &mut data); data.write_all(&typeid.to_le_bytes()) .expect("coudn't write to output file"); // str - type - rad_types::write_str_bin(umi_tag_str, &rad_types::RadIntId::U16, &mut data); + libradicl::io::write_str_bin(umi_tag_str, &rad_types::RadIntId::U16, &mut data); data.write_all(&typeid.to_le_bytes()) .expect("coudn't write to output file"); @@ -225,10 +216,10 @@ where // type is conditional on barcode and umi length let bc_typeid = match bclen { - 1..=4 => rad_types::encode_type_tag(rad_types::RadType::U8).unwrap(), - 5..=8 => rad_types::encode_type_tag(rad_types::RadType::U16).unwrap(), - 9..=16 => rad_types::encode_type_tag(rad_types::RadType::U32).unwrap(), - 17..=32 => rad_types::encode_type_tag(rad_types::RadType::U64).unwrap(), + 1..=4 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U8)).unwrap(), + 5..=8 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U16)).unwrap(), + 9..=16 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U32)).unwrap(), + 17..=32 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U64)).unwrap(), l => { crit!(log, "cannot encode barcode of length {} > 32", l); std::process::exit(1); @@ -236,10 +227,10 @@ where }; let umi_typeid = match umilen { - 1..=4 => rad_types::encode_type_tag(rad_types::RadType::U8).unwrap(), - 5..=8 => rad_types::encode_type_tag(rad_types::RadType::U16).unwrap(), - 9..=16 => rad_types::encode_type_tag(rad_types::RadType::U32).unwrap(), - 17..=32 => rad_types::encode_type_tag(rad_types::RadType::U64).unwrap(), + 1..=4 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U8)).unwrap(), + 5..=8 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U16)).unwrap(), + 9..=16 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U32)).unwrap(), + 17..=32 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U64)).unwrap(), l => { crit!(log, "cannot encode umi of length {} > 32", l); std::process::exit(1); @@ -248,11 +239,11 @@ where //info!(log, "CB LEN : {}, UMI LEN : {}", bclen, umilen); - rad_types::write_str_bin(cb_tag_str, &rad_types::RadIntId::U16, &mut data); + libradicl::io::write_str_bin(cb_tag_str, &rad_types::RadIntId::U16, &mut data); data.write_all(&bc_typeid.to_le_bytes()) .expect("coudn't write to output file"); - rad_types::write_str_bin(umi_tag_str, &rad_types::RadIntId::U16, &mut data); + libradicl::io::write_str_bin(umi_tag_str, &rad_types::RadIntId::U16, &mut data); data.write_all(&umi_typeid.to_le_bytes()) .expect("coudn't write to output file"); @@ -264,7 +255,7 @@ where // reference id let refid_str = "compressed_ori_refid"; typeid = 3u8; - rad_types::write_str_bin(refid_str, &rad_types::RadIntId::U16, &mut data); + libradicl::io::write_str_bin(refid_str, &rad_types::RadIntId::U16, &mut data); data.write_all(&typeid.to_le_bytes()) .expect("coudn't write to output file"); @@ -503,7 +494,8 @@ where { let i_file = File::open(rad_file).unwrap(); let mut br = BufReader::new(i_file); - let hdr = rad_types::RadHeader::from_bytes(&mut br); + let prelude = RadPrelude::from_bytes(&mut br)?; + let hdr = &prelude.hdr; // info!( // log, // "paired : {:?}, ref_count : {}, num_chunks : {}", @@ -512,23 +504,23 @@ where // hdr.num_chunks.to_formatted_string(&Locale::en) // ); // file-level - let _fl_tags = rad_types::TagSection::from_bytes(&mut br); + //let _fl_tags = rad_types::TagSection::from_bytes(&mut br); // info!(log, "read {:?} file-level tags", fl_tags.tags.len()); // read-level - let rl_tags = rad_types::TagSection::from_bytes(&mut br); + let rl_tags = &prelude.read_tags; // info!(log, "read {:?} read-level tags", rl_tags.tags.len()); // right now, we only handle BC and UMI types of U8—U64, so validate that const BNAME: &str = "b"; const UNAME: &str = "u"; - let mut bct: Option = None; - let mut umit: Option = None; + let mut bct: Option = None; + let mut umit: Option = None; for rt in &rl_tags.tags { // if this is one of our tags if rt.name == BNAME || rt.name == UNAME { - if rad_types::decode_int_type_tag(rt.typeid).is_none() { + if !rt.typeid.is_int_type() { crit!( log, "currently only RAD types 1--4 are supported for 'b' and 'u' tags." @@ -544,20 +536,38 @@ where } } } + assert!(bct.is_some(), "barcode type tag was missing!"); + assert!(umit.is_some(), "umi type tag was missing!"); // alignment-level - let _al_tags = rad_types::TagSection::from_bytes(&mut br); + // let _al_tags = rad_types::TagSection::from_bytes(&mut br); // info!(log, "read {:?} alignemnt-level tags", al_tags.tags.len()); - let ft_vals = rad_types::FileTags::from_bytes(&mut br); - // info!(log, "File-level tag values {:?}", ft_vals); + let file_tag_map = prelude.file_tags.parse_tags_from_bytes(&mut br)?; + info!(log, "File-level tag map {:?}", file_tag_map); + + let barcode_tag = file_tag_map + .get("cblen") + .expect("tag map must contain cblen"); + let barcode_len = match barcode_tag { + &TagValue::U8(x) => x as u16, + &TagValue::U16(x) => x, + &TagValue::U32(x) => x as u16, + &TagValue::U64(x) => x as u16, + _ => bail!("unexpected tag type"), + }; + + let umi_tag = file_tag_map.get("ulen").expect("tag map must contain ulen"); + let umi_len = match umi_tag { + &TagValue::U8(x) => x as u16, + &TagValue::U16(x) => x, + &TagValue::U32(x) => x as u16, + &TagValue::U64(x) => x as u16, + _ => bail!("unexpected tag type"), + }; let mut num_reads: u64 = 0; - - let bc_type = rad_types::decode_int_type_tag(bct.expect("no barcode tag description present.")) - .context("unknown barcode type id.")?; - let umi_type = rad_types::decode_int_type_tag(umit.expect("no umi tag description present")) - .context("unknown barcode type id.")?; + let record_context = prelude.get_record_context::()?; let stdout = stdout(); // get the global stdout entity let stdout_l = stdout.lock(); @@ -576,10 +586,10 @@ where let mut id = 0usize; for _ in 0..(hdr.num_chunks as usize) { - let c = rad_types::Chunk::from_bytes(&mut br, &bc_type, &umi_type); + let c = chunk::Chunk::::from_bytes(&mut br, &record_context); for read in c.reads.iter() { - let bc_mer: BitKmer = (read.bc, ft_vals.bclen as u8); - let umi_mer: BitKmer = (read.umi, ft_vals.umilen as u8); + let bc_mer: BitKmer = (read.bc, barcode_len as u8); + let umi_mer: BitKmer = (read.umi, umi_len as u8); // let umi = str::from_utf8(&umi_).unwrap(); let num_entries = read.refs.len(); diff --git a/src/eq_class.rs b/src/eq_class.rs index aebaf64..de8078c 100644 --- a/src/eq_class.rs +++ b/src/eq_class.rs @@ -2,7 +2,8 @@ use std::collections::HashMap; use std::hash::{BuildHasher, Hasher}; use std::io::BufRead; -use libradicl::rad_types; +use libradicl::chunk; +use libradicl::record::AlevinFryReadRecord; /** * Single-cell equivalence class @@ -276,7 +277,7 @@ impl EqMap { } #[allow(dead_code)] - fn init_from_small_chunk(&mut self, cell_chunk: &mut rad_types::Chunk) { + fn init_from_small_chunk(&mut self, cell_chunk: &mut chunk::Chunk) { //let rand_state = ahash::RandomState::with_seeds(2u64, 7u64, 1u64, 8u64); let mut hasher = self.eqid_map.hasher().build_hasher(); //let mut hasher = rand_state.build_hasher(); @@ -345,7 +346,7 @@ impl EqMap { pub fn init_from_chunk_gene_level( &mut self, - cell_chunk: &mut rad_types::Chunk, + cell_chunk: &mut chunk::Chunk, tid_to_gid: &[u32], ) { self.eqid_map.clear(); @@ -435,7 +436,7 @@ impl EqMap { } } - pub fn init_from_chunk(&mut self, cell_chunk: &mut rad_types::Chunk) { + pub fn init_from_chunk(&mut self, cell_chunk: &mut chunk::Chunk) { /* if cell_chunk.reads.len() < 10 { self.init_from_small_chunk(cell_chunk); diff --git a/src/io_utils.rs b/src/io_utils.rs index ac92042..7c246af 100644 --- a/src/io_utils.rs +++ b/src/io_utils.rs @@ -4,7 +4,9 @@ use crossbeam_queue::ArrayQueue; use indicatif::ProgressBar; use scroll::Pwrite; -use libradicl::rad_types; +use libradicl::chunk; +use libradicl::record::{AlevinFryReadRecord, AlevinFryRecordContext}; + use std::collections::HashSet; use std::io::Read; use std::sync::Arc; @@ -69,7 +71,7 @@ pub(crate) fn fill_work_queue( // and we are just filling up the buffer with the last cell, and there will be no more // headers left to read, so skip this if chunk_num < num_chunks { - let (nc, nr) = rad_types::Chunk::read_header(&mut br); + let (nc, nr) = chunk::Chunk::::read_header(&mut br); nbytes_chunk = nc; nrec_chunk = nr; } @@ -109,18 +111,13 @@ pub(crate) fn fill_work_queue( /// any cell whose barcode is not in `keep_set`. pub(crate) fn fill_work_queue_filtered( keep_set: HashSet, - rl_tags: &rad_types::TagSection, + record_context: &AlevinFryRecordContext, q: Arc>, mut br: T, num_chunks: usize, pbar: &ProgressBar, ) -> anyhow::Result<()> { - let bct = rl_tags.tags[0].typeid; - let umit = rl_tags.tags[1].typeid; - let bc_type = rad_types::decode_int_type_tag(bct).context("unsupported barcode type id.")?; - let umi_type = rad_types::decode_int_type_tag(umit).context("unsupported umi type id.")?; - - const BUFSIZE: usize = 524208; + const BUFSIZE: usize = 524_208; // the buffer that will hold our records let mut buf = vec![0u8; BUFSIZE]; // the number of bytes currently packed into the chunk @@ -164,8 +161,10 @@ pub(crate) fn fill_work_queue_filtered( br.read_exact(&mut buf[(boffset + 8)..(boffset + nbytes_chunk as usize)]) .context("failed to read from queue.")?; // get the barcode for this chunk - let (bc, _umi) = - rad_types::Chunk::peek_record(&buf[boffset + 8..], &bc_type, &umi_type); + let (bc, _umi) = chunk::Chunk::::peek_record( + &buf[boffset + 8..], + &record_context, + ); if keep_set.contains(&bc) { cells_in_chunk += 1; cbytes += nbytes_chunk; @@ -181,7 +180,7 @@ pub(crate) fn fill_work_queue_filtered( // and we are just filling up the buffer with the last cell, and there will be no more // headers left to read, so skip this if chunk_num < num_chunks { - let (nc, nr) = rad_types::Chunk::read_header(&mut br); + let (nc, nr) = chunk::Chunk::::read_header(&mut br); nbytes_chunk = nc; nrec_chunk = nr; } diff --git a/src/main.rs b/src/main.rs index 33fd704..6a0de68 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. * * This file is part of alevin-fry * (see https://github.com/COMBINE-lab/alevin-fry). diff --git a/src/pugutils.rs b/src/pugutils.rs index 2007ba1..571db33 100644 --- a/src/pugutils.rs +++ b/src/pugutils.rs @@ -20,7 +20,8 @@ use petgraph::prelude::*; use petgraph::unionfind::*; use petgraph::visit::NodeIndexable; -use libradicl::rad_types; +use libradicl::chunk; +use libradicl::record::AlevinFryReadRecord; use slog::{crit, info, warn}; @@ -626,7 +627,7 @@ fn resolve_num_molecules_crlike_from_vec( } pub fn get_num_molecules_cell_ranger_like_small( - cell_chunk: &mut rad_types::Chunk, + cell_chunk: &mut chunk::Chunk, tid_to_gid: &[u32], _num_genes: usize, gene_eqclass_hash: &mut HashMap, u32, ahash::RandomState>, diff --git a/src/quant.rs b/src/quant.rs index 835dd4f..f37ba74 100644 --- a/src/quant.rs +++ b/src/quant.rs @@ -7,7 +7,7 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ -use anyhow::Context; +use anyhow::{bail, Context}; use crossbeam_queue::ArrayQueue; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; @@ -43,7 +43,13 @@ use crate::io_utils; use crate::prog_opts::QuantOpts; use crate::pugutils; use crate::utils as afutils; -use libradicl::rad_types; + +use libradicl::rad_types::TagValue; +use libradicl::{ + chunk, + header::RadPrelude, + record::{AlevinFryReadRecord, AlevinFryRecordContext}, +}; type BufferedGzFile = BufWriter>; @@ -344,7 +350,10 @@ pub fn quantify(quant_opts: QuantOpts) -> anyhow::Result<()> { // with these options pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result<()> { let parent = std::path::Path::new(quant_opts.input_dir); - let hdr = rad_types::RadHeader::from_bytes(&mut br); + + let prelude = RadPrelude::from_bytes(&mut br)?; + let hdr = &prelude.hdr; + //let hdr = rad_types::RadHeader::from_bytes(&mut br); let init_uniform = quant_opts.init_uniform; let summary_stat = quant_opts.summary_stat; @@ -461,25 +470,35 @@ pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result< Arc::new(bincode::deserialize_from(&bc_unmapped_file).unwrap()); // file-level - let fl_tags = rad_types::TagSection::from_bytes(&mut br); + let fl_tags = &prelude.file_tags; info!(log, "read {:?} file-level tags", fl_tags.tags.len()); // read-level - let rl_tags = rad_types::TagSection::from_bytes(&mut br); + let rl_tags = &prelude.read_tags; info!(log, "read {:?} read-level tags", rl_tags.tags.len()); // alignment-level - let al_tags = rad_types::TagSection::from_bytes(&mut br); + let al_tags = &prelude.aln_tags; info!(log, "read {:?} alignemnt-level tags", al_tags.tags.len()); - let ft_vals = rad_types::FileTags::from_bytes(&mut br); - info!(log, "File-level tag values {:?}", ft_vals); + let file_tag_map = prelude.file_tags.parse_tags_from_bytes(&mut br)?; + info!(log, "File-level tag values {:?}", file_tag_map); + + let barcode_tag = file_tag_map + .get("cblen") + .expect("tag map must contain cblen"); + let barcode_len = match barcode_tag { + &TagValue::U8(x) => x as u16, + &TagValue::U16(x) => x, + &TagValue::U32(x) => x as u16, + &TagValue::U64(x) => x as u16, + _ => bail!("unexpected tag type"), + }; - let bct = rl_tags.tags[0].typeid; - let umit = rl_tags.tags[1].typeid; + let record_context = prelude.get_record_context::()?; // if we have a filter list, extract it here let mut retained_bc: Option> = None; if let Some(fname) = filter_list { - match afutils::read_filter_list(fname, ft_vals.bclen) { + match afutils::read_filter_list(fname, barcode_len) { Ok(fset) => { // the number of cells we expect to // actually process @@ -534,9 +553,12 @@ pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result< let tid_to_gid_shared = std::sync::Arc::new(tid_to_gid); // the number of reference sequences let ref_count = hdr.ref_count as u32; + + // TODO -- maybe delete March 5, 2024 // the types for the barcodes and umis - let bc_type = rad_types::decode_int_type_tag(bct).expect("unsupported barcode type id."); - let umi_type = rad_types::decode_int_type_tag(umit).expect("unsupported umi type id."); + // let bc_type = rad_types::decode_int_type_tag(bct).expect("unsupported barcode type id."); + // let umi_type = rad_types::decode_int_type_tag(umit).expect("unsupported umi type id."); + // the number of genes (different than the number of reference sequences, which are transcripts) let num_genes = gene_name_to_id.len(); @@ -636,15 +658,19 @@ pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result< let tid_to_gid = tid_to_gid_shared.clone(); // and the atomic counter of remaining work let cells_remaining = cells_to_process.clone(); + + let record_context = record_context.clone(); + // TODO -- maybe delete March 5, 2024 // they will need to know the bc and umi type - let bc_type = bc_type; - let umi_type = umi_type; + // let bc_type = bc_type; + // let umi_type = umi_type; + // and the file writer let bcout = bc_writer.clone(); // global gene-level eqc map let eqid_map_lockc = eqid_map_lock.clone(); // and will need to know the barcode length - let bclen = ft_vals.bclen; + let bclen = barcode_len; let alt_res_cells = alt_res_cells.clone(); let empty_resolved_cells = empty_resolved_cells.clone(); let unmapped_count = bc_unmapped_map.clone(); @@ -738,7 +764,10 @@ pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result< BufReader::new(&buf[byte_offset..(byte_offset + nbytes as usize)]); byte_offset += nbytes as usize; - let mut c = rad_types::Chunk::from_bytes(&mut nbr, &bc_type, &umi_type); + let mut c = chunk::Chunk::::from_bytes( + &mut nbr, + &record_context, + ); if c.reads.is_empty() { warn!(log, "Discovered empty chunk; should not happen! cell_num = {}, nbytes = {}, nrec = {}", cell_num, nbytes, nrec); } @@ -1198,7 +1227,7 @@ pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result< // we have a retained set io_utils::fill_work_queue_filtered( ret_bc, - &rl_tags, + &record_context, q, br, hdr.num_chunks as usize, From 12d088c836b4a75ac50b72aeb915888f900768d5 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Tue, 5 Mar 2024 14:44:36 -0500 Subject: [PATCH 03/13] feat: working with libradicl-0.8.2-pre --- Cargo.lock | 2 +- src/cellfilter.rs | 20 ++++---------------- src/collate.rs | 2 +- src/convert.rs | 21 ++++----------------- src/io_utils.rs | 2 +- src/main.rs | 2 +- src/pugutils.rs | 4 ++-- src/quant.rs | 11 ++--------- src/utils.rs | 6 ++---- 9 files changed, 18 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d6f18e1..5f1f288 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -917,7 +917,7 @@ dependencies = [ [[package]] name = "libradicl" version = "0.8.1" -source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#232b2b32bc2b8a1aebd5f11dad6c495d4f0c86c1" +source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#58b0c8877cfc6ea32a85148946648f4d23917b62" dependencies = [ "ahash", "anyhow", diff --git a/src/cellfilter.rs b/src/cellfilter.rs index 396c26b..c191c17 100644 --- a/src/cellfilter.rs +++ b/src/cellfilter.rs @@ -7,7 +7,7 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ -use anyhow::{anyhow, bail, Context}; +use anyhow::{anyhow, Context}; use slog::crit; use slog::info; @@ -19,7 +19,7 @@ use bio_types::strand::Strand; use bstr::io::BufReadExt; use itertools::Itertools; use libradicl::exit_codes; -use libradicl::rad_types::{self, RadType, TagValue}; +use libradicl::rad_types::{self, RadType}; use libradicl::BarcodeLookupMap; use libradicl::{ chunk, @@ -287,13 +287,7 @@ fn process_unfiltered( let barcode_tag = file_tag_map .get("cblen") .expect("tag map must contain cblen"); - let barcode_len = match barcode_tag { - &TagValue::U8(x) => x as u16, - &TagValue::U16(x) => x, - &TagValue::U32(x) => x as u16, - &TagValue::U64(x) => x as u16, - _ => bail!("unexpected tag type"), - }; + let barcode_len: u16 = barcode_tag.try_into()?; // now, we create a second barcode map with just the barcodes // for cells we will keep / rescue. @@ -479,13 +473,7 @@ fn process_filtered( let barcode_tag = file_tag_map .get("cblen") .expect("tag map must contain cblen"); - let barcode_len = match barcode_tag { - &TagValue::U8(x) => x as u16, - &TagValue::U16(x) => x, - &TagValue::U32(x) => x as u16, - &TagValue::U64(x) => x as u16, - _ => bail!("unexpected tag type"), - }; + let barcode_len: u16 = barcode_tag.try_into()?; // select from among supported filter methods match filter_meth { diff --git a/src/collate.rs b/src/collate.rs index 6a151bb..10fc3d6 100644 --- a/src/collate.rs +++ b/src/collate.rs @@ -123,7 +123,7 @@ where let freq_hm: HashMap = bincode::deserialize_from(rdr).context("couldn't deserialize barcode to frequency map.")?; let total_to_collate = freq_hm.values().sum(); - let mut tsv_map = Vec::from_iter(freq_hm.into_iter()); + let mut tsv_map = Vec::from_iter(freq_hm); // sort this so that we deal with largest cells (by # of reads) first // sort in _descending_ order by count. diff --git a/src/convert.rs b/src/convert.rs index a9618c8..a84a297 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -7,7 +7,6 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ -use anyhow::bail; use indicatif::{ProgressBar, ProgressStyle}; use slog::{crit, info}; //use num_format::{Locale}; @@ -18,7 +17,7 @@ use std::io::{stdout, BufReader, BufWriter, Cursor, Seek, SeekFrom, Write}; // use rust_htslib::{bam, bam::record::Aux, bam::Read}; -use libradicl::rad_types::{self, RadType, TagValue}; +use libradicl::rad_types::{self, RadType}; use libradicl::utils::MASK_LOWER_31_U32; use libradicl::{ chunk, @@ -48,7 +47,7 @@ use std::str; #[allow(dead_code)] fn get_random_nucl() -> &'static str { - let nucl = vec!["A", "T", "G", "C"]; + let nucl = ["A", "T", "G", "C"]; let mut rng = rand::thread_rng(); let idx = rng.gen_range(0..4); nucl[idx] @@ -549,22 +548,10 @@ where let barcode_tag = file_tag_map .get("cblen") .expect("tag map must contain cblen"); - let barcode_len = match barcode_tag { - &TagValue::U8(x) => x as u16, - &TagValue::U16(x) => x, - &TagValue::U32(x) => x as u16, - &TagValue::U64(x) => x as u16, - _ => bail!("unexpected tag type"), - }; + let barcode_len: u16 = barcode_tag.try_into()?; let umi_tag = file_tag_map.get("ulen").expect("tag map must contain ulen"); - let umi_len = match umi_tag { - &TagValue::U8(x) => x as u16, - &TagValue::U16(x) => x, - &TagValue::U32(x) => x as u16, - &TagValue::U64(x) => x as u16, - _ => bail!("unexpected tag type"), - }; + let umi_len: u16 = umi_tag.try_into()?; let mut num_reads: u64 = 0; let record_context = prelude.get_record_context::()?; diff --git a/src/io_utils.rs b/src/io_utils.rs index 7c246af..d789e47 100644 --- a/src/io_utils.rs +++ b/src/io_utils.rs @@ -163,7 +163,7 @@ pub(crate) fn fill_work_queue_filtered( // get the barcode for this chunk let (bc, _umi) = chunk::Chunk::::peek_record( &buf[boffset + 8..], - &record_context, + record_context, ); if keep_set.contains(&bc) { cells_in_chunk += 1; diff --git a/src/main.rs b/src/main.rs index 6a0de68..6bc8a0b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -344,7 +344,7 @@ fn main() -> anyhow::Result<()> { .build(); match generate_permit_list(gpl_opts) { - Ok(nc) if nc == 0 => { + Ok(0) => { warn!(log, "found 0 corrected barcodes; please check the input."); } Err(e) => return Err(e), diff --git a/src/pugutils.rs b/src/pugutils.rs index 571db33..388289a 100644 --- a/src/pugutils.rs +++ b/src/pugutils.rs @@ -295,7 +295,7 @@ where let mut components = get_map(); for (i, v) in labels.iter().enumerate() { - let ve = components.entry(*v as u32).or_insert_with(Vec::new); + let ve = components.entry(*v as u32).or_default(); ve.push(i as u32); } components @@ -816,7 +816,7 @@ fn get_num_molecules_large_component( let vert = g.from_index(*vertex_id as usize); // add the corresponding (UMI, frequency) pair to the map // for this eq_id - let umis = tmp_map.entry(vert.0).or_insert_with(Vec::new); + let umis = tmp_map.entry(vert.0).or_default(); umis.push(eq_map.eqc_info[vert.0 as usize].umis[vert.1 as usize]); } diff --git a/src/quant.rs b/src/quant.rs index f37ba74..d0b2017 100644 --- a/src/quant.rs +++ b/src/quant.rs @@ -7,7 +7,7 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ -use anyhow::{bail, Context}; +use anyhow::Context; use crossbeam_queue::ArrayQueue; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; @@ -44,7 +44,6 @@ use crate::prog_opts::QuantOpts; use crate::pugutils; use crate::utils as afutils; -use libradicl::rad_types::TagValue; use libradicl::{ chunk, header::RadPrelude, @@ -485,13 +484,7 @@ pub fn do_quantify(mut br: T, quant_opts: QuantOpts) -> anyhow::Result< let barcode_tag = file_tag_map .get("cblen") .expect("tag map must contain cblen"); - let barcode_len = match barcode_tag { - &TagValue::U8(x) => x as u16, - &TagValue::U16(x) => x, - &TagValue::U32(x) => x as u16, - &TagValue::U64(x) => x as u16, - _ => bail!("unexpected tag type"), - }; + let barcode_len: u16 = barcode_tag.try_into()?; let record_context = prelude.get_record_context::()?; diff --git a/src/utils.rs b/src/utils.rs index 0b42881..67c8cb9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -561,8 +561,7 @@ pub fn get_all_snps(bc: u64, bc_length: usize) -> Vec { "barcode length greater than 32 not supported" ); - let mut snps: Vec = Vec::new(); - snps.reserve(3 * bc_length); + let mut snps: Vec = Vec::with_capacity(3 * bc_length); for nt_index in 1..=bc_length { // clearing the two relevant bits based on nucleotide position @@ -590,8 +589,7 @@ pub fn get_all_indels(bc: u64, bc_length: usize) -> Vec { "barcode length greater than 32 not supported" ); - let mut indels: Vec = Vec::new(); - indels.reserve(8 * (bc_length - 1)); + let mut indels: Vec = Vec::with_capacity(8 * (bc_length - 1)); for nt_index in 1..bc_length { let mut bit_mask = 1 << (2 * nt_index); From 4114afcab6ffd80318a101eab8c5883267000387 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Tue, 5 Mar 2024 15:22:43 -0500 Subject: [PATCH 04/13] upgrade deps --- Cargo.lock | 724 +++++++++++++++++++++++------------------------------ Cargo.toml | 38 +-- 2 files changed, 334 insertions(+), 428 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f1f288..71cb847 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,9 +23,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -99,51 +99,50 @@ dependencies = [ [[package]] name = "anstream" -version = "0.3.2" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", - "is-terminal", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.1" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -176,17 +175,6 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -247,37 +235,36 @@ dependencies = [ [[package]] name = "buffer-redux" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2886ea01509598caac116942abd33ab5a88fa32acdf7e4abfa0fc489ca520c9" +checksum = "4c9f8ddd22e0a12391d1e7ada69ec3b0da1914f1cec39c5cf977143c5b2854f5" dependencies = [ "memchr", - "safemem", ] [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" [[package]] name = "bytecount" -version = "0.6.3" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "bytemuck" -version = "1.13.1" +version = "1.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -308,11 +295,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.79" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "a0ba8f7aaa012f30d5b2861462f6708eccd49c3c39863fe083a308035f63d723" dependencies = [ "jobserver", + "libc", ] [[package]] @@ -323,50 +311,46 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.26" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", - "time 0.1.45", "wasm-bindgen", - "winapi", + "windows-targets 0.52.4", ] [[package]] name = "clap" -version = "4.3.9" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bba77a07e4489fb41bd90e8d4201c3eb246b3c2c9ea2ba0bddd6c1d1df87db7d" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" dependencies = [ "clap_builder", "clap_derive", - "once_cell", ] [[package]] name = "clap_builder" -version = "4.3.9" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9b4a88bb4bc35d3d6f65a21b0f0bafe9c894fa00978de242c555ec28bea1c0" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" dependencies = [ "anstream", "anstyle", - "bitflags 1.3.2", "clap_lex", - "once_cell", "strsim", "terminal_size", ] [[package]] name = "clap_derive" -version = "4.3.2" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" dependencies = [ "heck", "proc-macro2", @@ -376,9 +360,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.5.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "cmake" @@ -397,90 +381,80 @@ checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" dependencies = [ - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-queue" -version = "0.3.8" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" dependencies = [ - "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" -dependencies = [ - "cfg-if", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -490,9 +464,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] @@ -510,12 +484,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.3", + "hashbrown", "lock_api", "once_cell", "parking_lot_core", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive-new" version = "0.5.9" @@ -550,9 +533,9 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "encode_unicode" @@ -568,23 +551,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.48.0", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] @@ -595,9 +567,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.26" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -605,9 +577,9 @@ dependencies = [ [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -623,13 +595,13 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -638,12 +610,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.3" @@ -658,18 +624,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.1" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hts-sys" @@ -687,16 +644,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -710,9 +667,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -724,16 +681,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9007da9cacbd3e6343da136e98b0d2df013f553d35bdec8b518f07bea768e19c" -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.2.5" @@ -741,14 +688,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", - "hashbrown 0.14.3", + "hashbrown", ] [[package]] name = "indicatif" -version = "0.17.5" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" dependencies = [ "console", "instant", @@ -766,27 +713,15 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi 0.3.1", - "libc", - "windows-sys 0.48.0", -] - [[package]] name = "is-terminal" -version = "0.4.7" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ - "hermit-abi 0.3.1", - "io-lifetimes", - "rustix", - "windows-sys 0.48.0", + "hermit-abi", + "libc", + "windows-sys 0.52.0", ] [[package]] @@ -800,24 +735,24 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -894,21 +829,21 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.147" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libmimalloc-sys" -version = "0.1.33" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4ac0e912c8ef1b735e92369695618dc5b1819f5a7bf3f167301a3ba1cea515e" +checksum = "3979b5c37ece694f1f5e51e7ecc871fdb0f517ed04ee45f88d15d6d553cb9664" dependencies = [ "cc", "libc", @@ -932,11 +867,22 @@ dependencies = [ "snap", ] +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.2", + "libc", + "redox_syscall", +] + [[package]] name = "libz-sys" -version = "1.1.9" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" dependencies = [ "cc", "cmake", @@ -953,15 +899,15 @@ checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee" [[package]] name = "linux-raw-sys" -version = "0.3.8" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -969,9 +915,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.19" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lzma-sys" @@ -986,9 +932,9 @@ dependencies = [ [[package]] name = "matrixmultiply" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" dependencies = [ "autocfg", "rawpointer", @@ -1000,29 +946,20 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - [[package]] name = "mimalloc" -version = "0.1.37" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2894987a3459f3ffb755608bd82188f8ed00d0ae077f1edea29c068d639d98" +checksum = "fa01922b5ea280a911e323e4d2fd24b7fe5cc4042e0d2cda3c40775cdc4bdc9c" dependencies = [ "libmimalloc-sys", ] [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -1036,7 +973,7 @@ dependencies = [ "approx 0.5.1", "matrixmultiply", "nalgebra-macros", - "num-complex 0.4.3", + "num-complex 0.4.5", "num-rational", "num-traits", "rand", @@ -1063,7 +1000,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" dependencies = [ "matrixmultiply", - "num-complex 0.4.3", + "num-complex 0.4.5", "num-integer", "num-traits", "rawpointer", @@ -1102,7 +1039,7 @@ dependencies = [ "bstr", "byteorder", "bytes", - "indexmap 2.2.5", + "indexmap", "noodles-bgzf", "noodles-core", "noodles-csi", @@ -1135,7 +1072,7 @@ checksum = "a60dfe0919f7ecbd081a82eb1d32e8f89f9041932d035fe8309073c8c01277bf" dependencies = [ "bit-vec", "byteorder", - "indexmap 2.2.5", + "indexmap", "noodles-bgzf", "noodles-core", ] @@ -1148,7 +1085,7 @@ checksum = "1f0d8e441368374f6e144989f823fd7c05e58cdaa3f97d22bb4d75b534327b87" dependencies = [ "bitflags 2.4.2", "bstr", - "indexmap 2.2.5", + "indexmap", "lexical-core", "memchr", "noodles-bgzf", @@ -1163,7 +1100,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", - "num-complex 0.4.3", + "num-complex 0.4.5", "num-integer", "num-iter", "num-rational", @@ -1172,9 +1109,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" dependencies = [ "autocfg", "num-integer", @@ -1193,13 +1130,19 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-format" version = "0.4.4" @@ -1212,19 +1155,18 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" dependencies = [ "autocfg", "num-integer", @@ -1245,9 +1187,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", @@ -1259,16 +1201,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.1", - "libc", -] - -[[package]] -name = "num_threads" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ + "hermit-abi", "libc", ] @@ -1280,56 +1213,62 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", + "redox_syscall", "smallvec", - "windows-targets 0.48.1", + "windows-targets 0.48.5", ] [[package]] name = "paste" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 1.9.3", + "indexmap", ] [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "portable-atomic" -version = "1.3.3" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "767eb9f07d4a5ebcb39bbf2d452058a93c011373abf6832e24194a1c3f004794" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" @@ -1409,9 +1348,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.7.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ "either", "rayon-core", @@ -1419,42 +1358,31 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", ] [[package]] name = "redox_syscall" -version = "0.3.5" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] [[package]] name = "redox_users" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" dependencies = [ "getrandom", - "redox_syscall 0.2.16", + "libredox", "thiserror", ] @@ -1519,45 +1447,38 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.20" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.2", "errno", - "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "rustversion" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "safe_arch" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62a7484307bd40f8f7ccbacccac730108f2cae119a3b11c74485b48aa9ea650f" +checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354" dependencies = [ "bytemuck", ] -[[package]] -name = "safemem" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" - [[package]] name = "sce" version = "0.2.0" @@ -1574,9 +1495,9 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scroll" @@ -1618,9 +1539,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.99" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -1634,7 +1555,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" dependencies = [ "approx 0.5.1", - "num-complex 0.4.3", + "num-complex 0.4.5", "num-traits", "paste", "wide", @@ -1648,9 +1569,9 @@ checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06" [[package]] name = "slog-async" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe" +checksum = "72c8038f898a2c79507940990f05386455b3a317d8f18d4caea7cbc3d5096b84" dependencies = [ "crossbeam-channel", "slog", @@ -1660,15 +1581,15 @@ dependencies = [ [[package]] name = "slog-term" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +checksum = "b6e022d0b998abfe5c3782c1f03551a596269450ccd677ea51c56f8b214610e8" dependencies = [ - "atty", + "is-terminal", "slog", "term", "thread_local", - "time 0.3.22", + "time", ] [[package]] @@ -1679,9 +1600,9 @@ checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "snap" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sprs" @@ -1691,7 +1612,7 @@ checksum = "88bab60b0a18fb9b3e0c26e92796b3c3a278bf5fa4880f5ad5cc3bdfb843d0b1" dependencies = [ "alga", "ndarray", - "num-complex 0.4.3", + "num-complex 0.4.5", "num-traits", "num_cpus", "rayon", @@ -1719,21 +1640,21 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] @@ -1777,9 +1698,9 @@ dependencies = [ [[package]] name = "terminal_size" -version = "0.2.6" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" dependencies = [ "rustix", "windows-sys 0.48.0", @@ -1787,18 +1708,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", @@ -1807,9 +1728,9 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" dependencies = [ "cfg-if", "once_cell", @@ -1817,24 +1738,14 @@ dependencies = [ [[package]] name = "time" -version = "0.1.45" +version = "0.3.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" dependencies = [ + "deranged", "itoa", - "libc", - "num_threads", + "num-conv", + "powerfmt", "serde", "time-core", "time-macros", @@ -1842,16 +1753,17 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.9" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" dependencies = [ + "num-conv", "time-core", ] @@ -1894,42 +1806,42 @@ dependencies = [ [[package]] name = "typenum" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "url" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -1954,12 +1866,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1968,9 +1874,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1978,9 +1884,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", @@ -1993,9 +1899,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2003,9 +1909,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", @@ -2016,15 +1922,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "wide" -version = "0.7.10" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40018623e2dba2602a9790faba8d33f2ebdebf4b86561b83928db735f8784728" +checksum = "89beec544f246e679fc25490e3f8e08003bc4bf612068f325120dad4cea02c1c" dependencies = [ "bytemuck", "safe_arch", @@ -2053,145 +1959,145 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.48.0" +name = "windows-core" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] name = "windows-sys" -version = "0.45.0" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets 0.42.2", + "windows-targets 0.48.5", ] [[package]] name = "windows-sys" -version = "0.48.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] name = "windows-targets" -version = "0.48.1" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "xz2" diff --git a/Cargo.toml b/Cargo.toml index 5ec17a4..25d4d47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,35 +40,35 @@ categories = ["command-line-utilities", "science"] libradicl = { git = "https://github.com/COMBINE-lab/libradicl", branch = "develop", version = "0.8.0" } anyhow = "1.0.80" arrayvec = "0.7.4" -ahash = "0.8.3" +ahash = "0.8.11" bincode = "1.3.3" -bstr = "1.5.0" -crossbeam-channel = "0.5.8" -crossbeam-queue = "0.3.8" +bstr = "1.9.1" +crossbeam-channel = "0.5.12" +crossbeam-queue = "0.3.11" # derive_builder = "0.11.2" typed-builder = "0.14.0" -indicatif = "0.17.5" +indicatif = "0.17.8" needletail = "0.5.1" -petgraph = "0.6.3" -flate2 = "1.0.26" +petgraph = "0.6.4" +flate2 = "1.0.28" scroll = "0.11.0" -serde = { version = "1.0.164", features = ["derive"] } -serde_json = "1.0.99" +serde = { version = "1.0.197", features = ["derive"] } +serde_json = "1.0.114" sprs = "0.11.1" slog = "2.7.0" -slog-term = "2.9.0" -slog-async = "2.7.0" -smallvec = "1.10.0" -snap = "1.1.0" +slog-term = "2.9.1" +slog-async = "2.8.0" +smallvec = "1.13.1" +snap = "1.1.1" rand = "0.8.5" -chrono = "0.4.26" -csv = "1.2.2" -mimalloc = { version = "0.1.37", default-features = false } +chrono = "0.4.34" +csv = "1.3.0" +mimalloc = { version = "0.1.39", default-features = false } num-format = "0.4.4" num_cpus = "1.16.0" -bio-types = { version = "1.0.0", default-features = true, features = ["serde"] } +bio-types = { version = "1.0.1", default-features = true, features = ["serde"] } itertools = "0.11.0" -thiserror = "1.0.40" +thiserror = "1.0.57" statrs = "0.16.0" rust-htslib = { version = "0.44.1", default-features = false, features = [ "bzip2", @@ -78,7 +78,7 @@ sce = { git = "https://github.com/parazodiac/SingleCellExperiment", branch = "de # no shenanigans; clap makes breaking "fixes" too often to allow variability # in the version different from what we tested with -clap = { version = "=4.3.9", features = ["derive", "wrap_help", "cargo", "help", "usage", "string", "error-context"] } +clap = { version = "=4.5.1", features = ["derive", "wrap_help", "cargo", "help", "usage", "string", "error-context"] } [profile.release] #debug = true From c2842f866dead7e8ef059625d4e5e79e43d8b8f9 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Tue, 5 Mar 2024 15:54:16 -0500 Subject: [PATCH 05/13] update all deps --- Cargo.lock | 34 +++++++++++++++++++--------------- Cargo.toml | 8 ++++---- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71cb847..70c2dd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,7 +57,7 @@ dependencies = [ "rand", "rust-htslib", "sce", - "scroll 0.11.0", + "scroll", "serde", "serde_json", "slog", @@ -726,9 +726,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -861,7 +861,7 @@ dependencies = [ "noodles-bam", "noodles-sam", "num", - "scroll 0.12.0", + "scroll", "serde", "smallvec", "snap", @@ -1417,9 +1417,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rust-htslib" -version = "0.44.1" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7eb0f29fce64a4e22578905efef3d72389058016023279a58b282eb5c0c467" +checksum = "aec6f9ca4601beb4ae75ff8c99144dd15de5a873f6adf058da299962c760968e" dependencies = [ "bio-types", "byteorder", @@ -1429,6 +1429,7 @@ dependencies = [ "ieee754", "lazy_static", "libc", + "libz-sys", "linear-map", "newtype_derive", "regex", @@ -1499,12 +1500,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "scroll" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" - [[package]] name = "scroll" version = "0.12.0" @@ -1795,13 +1790,22 @@ dependencies = [ [[package]] name = "typed-builder" -version = "0.14.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cba322cb9b7bc6ca048de49e83918223f35e7a86311267013afff257004870" +checksum = "444d8748011b93cb168770e8092458cb0f8854f931ff82fdf6ddfbd72a9c933e" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "563b3b88238ec95680aef36bdece66896eaa7ce3c0f1b4f39d38fb2435261352" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 25d4d47..240d9b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,12 +46,12 @@ bstr = "1.9.1" crossbeam-channel = "0.5.12" crossbeam-queue = "0.3.11" # derive_builder = "0.11.2" -typed-builder = "0.14.0" +typed-builder = "0.18.1" indicatif = "0.17.8" needletail = "0.5.1" petgraph = "0.6.4" flate2 = "1.0.28" -scroll = "0.11.0" +scroll = "0.12.0" serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" sprs = "0.11.1" @@ -67,10 +67,10 @@ mimalloc = { version = "0.1.39", default-features = false } num-format = "0.4.4" num_cpus = "1.16.0" bio-types = { version = "1.0.1", default-features = true, features = ["serde"] } -itertools = "0.11.0" +itertools = "0.12.1" thiserror = "1.0.57" statrs = "0.16.0" -rust-htslib = { version = "0.44.1", default-features = false, features = [ +rust-htslib = { version = "0.46.0", default-features = false, features = [ "bzip2", "lzma", ] } From e74dde2eb26159efc674697d0de397d0d661a074 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Wed, 6 Mar 2024 09:15:32 -0500 Subject: [PATCH 06/13] new convert compiles --- Cargo.lock | 113 ++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 + src/convert.rs | 149 ++++++++++++++++++++++++++++++++++++++++--------- src/main.rs | 2 +- 4 files changed, 238 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 70c2dd5..6b1aef1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,6 +51,8 @@ dependencies = [ "libradicl", "mimalloc", "needletail", + "noodles", + "noodles-util", "num-format", "num_cpus", "petgraph", @@ -222,6 +224,15 @@ version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.9.1" @@ -450,6 +461,16 @@ version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "csv" version = "1.3.0" @@ -510,6 +531,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -593,6 +624,16 @@ dependencies = [ "quick-error", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -852,7 +893,7 @@ dependencies = [ [[package]] name = "libradicl" version = "0.8.1" -source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#58b0c8877cfc6ea32a85148946648f4d23917b62" +source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#a9df1fa6815cb84d8a350bc72b4212bae47e3d5e" dependencies = [ "ahash", "anyhow", @@ -940,6 +981,16 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.1" @@ -1029,6 +1080,17 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "noodles" +version = "0.65.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38db1833ba39368f7a855c5b9a8412729a61dd86b2563e1a3bdb02aecbe92a3c" +dependencies = [ + "noodles-bam", + "noodles-bgzf", + "noodles-sam", +] + [[package]] name = "noodles-bam" version = "0.56.0" @@ -1064,6 +1126,27 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7336c3be652de4e05444c9b12a32331beb5ba3316e8872d92bfdd8ef3b06c282" +[[package]] +name = "noodles-cram" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34a70ebb5bc7ff2d07ce96c0568691e57be421c121103ebef10cf02a63d7d400" +dependencies = [ + "bitflags 2.4.2", + "bstr", + "byteorder", + "bytes", + "bzip2", + "flate2", + "indexmap", + "md-5", + "noodles-bam", + "noodles-core", + "noodles-fasta", + "noodles-sam", + "xz2", +] + [[package]] name = "noodles-csi" version = "0.30.0" @@ -1077,6 +1160,18 @@ dependencies = [ "noodles-core", ] +[[package]] +name = "noodles-fasta" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e9e953e4e90e6c96e6a384598ebf2ab6d2f5add259ff05a194cf635e892c980" +dependencies = [ + "bytes", + "memchr", + "noodles-bgzf", + "noodles-core", +] + [[package]] name = "noodles-sam" version = "0.53.0" @@ -1093,6 +1188,22 @@ dependencies = [ "noodles-csi", ] +[[package]] +name = "noodles-util" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e800d2619f75de004aef8beb29f604e667f5bf5f714ff05cce3730f5c8cc4958" +dependencies = [ + "flate2", + "noodles-bam", + "noodles-bgzf", + "noodles-core", + "noodles-cram", + "noodles-csi", + "noodles-fasta", + "noodles-sam", +] + [[package]] name = "num" version = "0.4.1" diff --git a/Cargo.toml b/Cargo.toml index 240d9b4..6a26ccb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,8 @@ sce = { git = "https://github.com/parazodiac/SingleCellExperiment", branch = "de # no shenanigans; clap makes breaking "fixes" too often to allow variability # in the version different from what we tested with clap = { version = "=4.5.1", features = ["derive", "wrap_help", "cargo", "help", "usage", "string", "error-context"] } +noodles = { version = "0.65.0", features = ["bam", "bgzf", "sam"] } +noodles-util = { version = "0.37.0", features = ["alignment"] } [profile.release] #debug = true diff --git a/src/convert.rs b/src/convert.rs index a84a297..aff85ba 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -7,8 +7,10 @@ * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ +use anyhow::bail; use indicatif::{ProgressBar, ProgressStyle}; use slog::{crit, info}; + //use num_format::{Locale}; use std::fs; use std::fs::File; @@ -17,6 +19,11 @@ use std::io::{stdout, BufReader, BufWriter, Cursor, Seek, SeekFrom, Write}; // use rust_htslib::{bam, bam::record::Aux, bam::Read}; +use noodles::bam as nbam; +use noodles::{bgzf, sam}; +use noodles_util::alignment; +use sam::alignment::record::data::field::{tag::Tag as SamTag, value::Value as SamTagValue}; + use libradicl::rad_types::{self, RadType}; use libradicl::utils::MASK_LOWER_31_U32; use libradicl::{ @@ -83,11 +90,19 @@ pub fn cb_string_to_u64(cb_str: &[u8]) -> Result> { Ok(cb_id) } -pub fn bam2rad(input_file: P1, rad_file: P2, num_threads: u32, log: &slog::Logger) +pub fn bam2rad( + input_file: P1, + rad_file: P2, + num_threads: u32, + log: &slog::Logger, +) -> anyhow::Result<()> where P1: AsRef, P2: AsRef, { + const CR: SamTag = SamTag::new(b'C', b'R'); + const UR: SamTag = SamTag::new(b'U', b'R'); + let oname = Path::new(rad_file.as_ref()); let parent = oname.parent().unwrap(); std::fs::create_dir_all(parent).unwrap(); @@ -105,13 +120,50 @@ where bam_bytes }; + // noodles reading + let file = File::open(&input_file)?; + + let mut reader: Box> = + match input_file.as_ref().extension().and_then(|ext| ext.to_str()) { + Some("bam") | Some("BAM") => { + let decomp_threads = std::num::NonZeroUsize::new(if num_threads > 1 { + (num_threads - 1) as usize + } else { + 1_usize + }) + .expect("invalid nonzero usize"); + let decoder: Box = Box::new( + bgzf::MultithreadedReader::with_worker_count(decomp_threads, file), + ); + Box::new(nbam::io::Reader::from(decoder)) + } + Some("sam") | Some("SAM") => { + let inner: Box = Box::new(BufReader::new(file)); + Box::new(sam::io::Reader::from(inner)) + } + _ => { + bail!("unsupported input file format, must end with bam/BAM or sam/SAM"); + } + }; + + /* + let header = reader.read_header()?; + + for result in reader.records(&header) { + let record = result?; + } + */ + if num_threads > 1 { bam.set_threads((num_threads as usize) - 1).unwrap(); } else { bam.set_threads(1).unwrap(); } - let hdrv = bam.header().to_owned(); + //let hdrv = bam.header().to_owned(); + + let hdrv = reader.read_alignment_header()?; + let mut data = Cursor::new(vec![]); // initialize the header (do we need this ?) // let mut hdr = libradicl::RadHeader::from_bam_header(&hdrv); @@ -135,15 +187,15 @@ where let is_paired = 0u8; data.write_all(&is_paired.to_le_bytes()) .expect("couldn't write to output file"); - let ref_count = hdrv.target_count() as u64; + let ref_count = hdrv.reference_sequences().len() as u64; data.write_all(&ref_count.to_le_bytes()) .expect("couldn't write to output file"); // create longest buffer - for t in hdrv.target_names().iter() { - let name_size = t.len() as u16; + for (k, v) in hdrv.reference_sequences().iter() { + let name_size = k.len() as u16; data.write_all(&name_size.to_le_bytes()) .expect("coudn't write to output file"); - data.write_all(t).expect("coudn't write to output file"); + data.write_all(k).expect("coudn't write to output file"); } let initial_num_chunks = 0u64; data.write_all(&initial_num_chunks.to_le_bytes()) @@ -152,7 +204,7 @@ where // test the header { - info!(log, "ref count: {:?} ", hdrv.target_count(),); + info!(log, "ref count: {:?} ", hdrv.reference_sequences().len(),); } // keep a pointer to header pos @@ -161,15 +213,22 @@ where // check header position info!(log, "end header pos: {:?}", end_header_pos,); + let mut record_it = reader.alignment_records(&hdrv).peekable(); + // ### start of tags // get the first record for creating flags - let mut rec = bam::Record::new(); - let first_record_exists = bam.read(&mut rec).is_some(); + let rec_res = record_it.peek(); + let first_record_exists = rec_res.is_some(); if !first_record_exists { crit!(log, "bam file had no records!"); std::process::exit(1); } + let rec = match rec_res.unwrap() { + Ok(x) => x.as_ref(), + Err(e) => bail!("{}", e), + }; + use bstr::BStr; // Tags we will have // write the tag meta-information section { @@ -193,14 +252,29 @@ where .expect("coudn't write to output file"); // read-level - let bc_string_in: &str = if let Ok(Aux::String(bcs)) = rec.aux(b"CR") { - bcs + let flag_data = rec.data(); + let bc_string_in: &str = if let Some(Ok(bcs)) = flag_data.get(&CR) { + match bcs { + SamTagValue::String(bstr) => { + str::from_utf8(>::as_ref(bstr).as_ref())? + } + _ => { + bail!("cannot convert non-string (Z) tag into barcode string."); + } + } } else { panic!("Input record missing CR tag!") }; - let umi_string_in: &str = if let Ok(Aux::String(umis)) = rec.aux(b"UR") { - umis + let umi_string_in: &str = if let Some(Ok(umis)) = flag_data.get(&UR) { + match umis { + SamTagValue::String(bstr) => { + str::from_utf8(>::as_ref(bstr).as_ref())? + } + _ => { + bail!("cannot convert non-string (Z) tag into umi string."); + } + } } else { panic!("Input record missing UR tag!") }; @@ -307,19 +381,26 @@ where let mut first_pass = true; //for r in bam.records(){ loop { - if !first_pass { - let next_record_exists = bam.read(&mut rec).is_some(); - if !next_record_exists { - break; - } + let rec_res = record_it.next(); + if rec_res.is_none() { + break; } first_pass = false; + // the iterator returns a result, so + // make sure it's an Ok variant here. + let rec = match rec_res.unwrap() { + Ok(x) => x, + Err(e) => bail!("{}", e), + }; - // let rec = r.unwrap(); - let is_reverse = rec.is_reverse(); - let qname_str = str::from_utf8(rec.qname()).unwrap().to_owned(); + let flags = rec.flags()?; + + let is_reverse = flags.is_reverse_complemented(); + let qname_str = str::from_utf8(rec.name().expect("valid name").as_bytes()) + .unwrap() + .to_owned(); let qname = qname_str; - let mut tid = rec.tid() as u32; + let mut tid = rec.reference_sequence_id(&hdrv).unwrap().unwrap() as u32; if qname == old_qname { if !is_reverse { tid |= MASK_LOWER_31_U32; @@ -376,14 +457,29 @@ where // if this is a new read update the old variables { - let bc_string_in: &str = if let Ok(Aux::String(bcs)) = rec.aux(b"CR") { - bcs + let flag_data = rec.data(); + let bc_string_in: &str = if let Some(Ok(bcs)) = flag_data.get(&CR) { + match bcs { + SamTagValue::String(bstr) => { + str::from_utf8(>::as_ref(bstr).as_ref())? + } + _ => { + bail!("cannot convert non-string (Z) tag into umi string."); + } + } } else { panic!("Input record missing CR tag!") }; - let umi_string_in: &str = if let Ok(Aux::String(umis)) = rec.aux(b"UR") { - umis + let umi_string_in: &str = if let Some(Ok(umis)) = flag_data.get(&UR) { + match umis { + SamTagValue::String(bstr) => { + str::from_utf8(>::as_ref(bstr).as_ref())? + } + _ => { + bail!("cannot convert non-string (Z) tag into umi string."); + } + } } else { panic!("Input record missing UR tag!") }; @@ -479,6 +575,7 @@ where .expect("couldn't write to output file."); info!(log, "finished writing to {:?}.", rad_file.as_ref()); + Ok(()) } pub fn view

(rad_file: P, print_header: bool, log: &slog::Logger) diff --git a/src/main.rs b/src/main.rs index 6bc8a0b..2f57c31 100644 --- a/src/main.rs +++ b/src/main.rs @@ -358,7 +358,7 @@ fn main() -> anyhow::Result<()> { let input_file: &PathBuf = t.get_one("bam").unwrap(); let rad_file: &PathBuf = t.get_one("output").unwrap(); let num_threads: u32 = *t.get_one("threads").unwrap(); - alevin_fry::convert::bam2rad(input_file, rad_file, num_threads, &log) + alevin_fry::convert::bam2rad(input_file, rad_file, num_threads, &log)? } // convert a rad file to a textual representation and write to stdout From 423f9bc63267c7ec0362f13d526af9ebf2dfa2ef Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Wed, 6 Mar 2024 12:05:44 -0500 Subject: [PATCH 07/13] clippy is happy --- src/convert.rs | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/convert.rs b/src/convert.rs index aff85ba..cc62021 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -17,11 +17,9 @@ use std::fs::File; use std::io::{stdout, BufReader, BufWriter, Cursor, Seek, SeekFrom, Write}; // use std::sync::{Arc, Mutex}; // -use rust_htslib::{bam, bam::record::Aux, bam::Read}; use noodles::bam as nbam; use noodles::{bgzf, sam}; -use noodles_util::alignment; use sam::alignment::record::data::field::{tag::Tag as SamTag, value::Value as SamTagValue}; use libradicl::rad_types::{self, RadType}; @@ -112,7 +110,6 @@ where } let ofile = File::create(rad_file.as_ref()).unwrap(); - let mut bam = bam::Reader::from_path(&input_file).unwrap(); let bam_bytes = fs::metadata(&input_file).unwrap().len(); info! { log, @@ -123,6 +120,7 @@ where // noodles reading let file = File::open(&input_file)?; + // example from https://github.com/zaeleus/noodles/issues/227 let mut reader: Box> = match input_file.as_ref().extension().and_then(|ext| ext.to_str()) { Some("bam") | Some("BAM") => { @@ -152,18 +150,10 @@ where for result in reader.records(&header) { let record = result?; } + let hdrv = bam.header().to_owned(); */ - if num_threads > 1 { - bam.set_threads((num_threads as usize) - 1).unwrap(); - } else { - bam.set_threads(1).unwrap(); - } - - //let hdrv = bam.header().to_owned(); - let hdrv = reader.read_alignment_header()?; - let mut data = Cursor::new(vec![]); // initialize the header (do we need this ?) // let mut hdr = libradicl::RadHeader::from_bam_header(&hdrv); @@ -177,8 +167,8 @@ where // file writer // let owriter = Arc::new(Mutex::new(BufWriter::with_capacity(1048576, ofile))); - let mut owriter = BufWriter::with_capacity(1048576, ofile); // intermediate buffer + let mut owriter = BufWriter::with_capacity(1048576, ofile); // write the header { @@ -191,7 +181,7 @@ where data.write_all(&ref_count.to_le_bytes()) .expect("couldn't write to output file"); // create longest buffer - for (k, v) in hdrv.reference_sequences().iter() { + for (k, _v) in hdrv.reference_sequences().iter() { let name_size = k.len() as u16; data.write_all(&name_size.to_le_bytes()) .expect("coudn't write to output file"); @@ -256,7 +246,7 @@ where let bc_string_in: &str = if let Some(Ok(bcs)) = flag_data.get(&CR) { match bcs { SamTagValue::String(bstr) => { - str::from_utf8(>::as_ref(bstr).as_ref())? + str::from_utf8(>::as_ref(bstr))? } _ => { bail!("cannot convert non-string (Z) tag into barcode string."); @@ -269,7 +259,7 @@ where let umi_string_in: &str = if let Some(Ok(umis)) = flag_data.get(&UR) { match umis { SamTagValue::String(bstr) => { - str::from_utf8(>::as_ref(bstr).as_ref())? + str::from_utf8(>::as_ref(bstr))? } _ => { bail!("cannot convert non-string (Z) tag into umi string."); @@ -378,14 +368,12 @@ where let mut bc = 0u64; let mut umi = 0u64; let mut tid_list = Vec::::new(); - let mut first_pass = true; //for r in bam.records(){ loop { let rec_res = record_it.next(); if rec_res.is_none() { break; } - first_pass = false; // the iterator returns a result, so // make sure it's an Ok variant here. let rec = match rec_res.unwrap() { @@ -461,7 +449,7 @@ where let bc_string_in: &str = if let Some(Ok(bcs)) = flag_data.get(&CR) { match bcs { SamTagValue::String(bstr) => { - str::from_utf8(>::as_ref(bstr).as_ref())? + str::from_utf8(>::as_ref(bstr))? } _ => { bail!("cannot convert non-string (Z) tag into umi string."); @@ -474,7 +462,7 @@ where let umi_string_in: &str = if let Some(Ok(umis)) = flag_data.get(&UR) { match umis { SamTagValue::String(bstr) => { - str::from_utf8(>::as_ref(bstr).as_ref())? + str::from_utf8(>::as_ref(bstr))? } _ => { bail!("cannot convert non-string (Z) tag into umi string."); From 327bcc3ddbf5d369dc6376c7dbb74c4eae3d4ba1 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Thu, 7 Mar 2024 17:21:09 -0500 Subject: [PATCH 08/13] nicer convert --- Cargo.lock | 5 +- Cargo.toml | 2 +- src/convert.rs | 280 +++++++++++++++++++++---------------------------- 3 files changed, 123 insertions(+), 164 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6b1aef1..9301943 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -892,12 +892,13 @@ dependencies = [ [[package]] name = "libradicl" -version = "0.8.1" -source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#a9df1fa6815cb84d8a350bc72b4212bae47e3d5e" +version = "0.8.2" +source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#ae81af9e0a695fbb56b568197a1110ba687d4e1c" dependencies = [ "ahash", "anyhow", "bio-types", + "byteorder", "dashmap", "noodles-bam", "noodles-sam", diff --git a/Cargo.toml b/Cargo.toml index 6a26ccb..db0c9a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ categories = ["command-line-utilities", "science"] [dependencies] # for local development, look in the libradicl git repository # but when published, pull the specified version -libradicl = { git = "https://github.com/COMBINE-lab/libradicl", branch = "develop", version = "0.8.0" } +libradicl = { git = "https://github.com/COMBINE-lab/libradicl", branch = "develop", version = "0.8.2" } anyhow = "1.0.80" arrayvec = "0.7.4" ahash = "0.8.11" diff --git a/src/convert.rs b/src/convert.rs index cc62021..a1aa67a 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -22,11 +22,11 @@ use noodles::bam as nbam; use noodles::{bgzf, sam}; use sam::alignment::record::data::field::{tag::Tag as SamTag, value::Value as SamTagValue}; -use libradicl::rad_types::{self, RadType}; +use libradicl::rad_types::{self, RadIntId, RadType, TagDesc, TagSection, TagSectionLabel}; use libradicl::utils::MASK_LOWER_31_U32; use libradicl::{ chunk, - header::RadPrelude, + header::{RadHeader, RadPrelude}, record::{AlevinFryReadRecord, AlevinFryRecordContext}, }; @@ -88,6 +88,55 @@ pub fn cb_string_to_u64(cb_str: &[u8]) -> Result> { Ok(cb_id) } +#[inline] +fn write_barcode(barcode_t: &RadType, barcode: u64, w: &mut W) -> anyhow::Result<()> { + match barcode_t { + RadType::Int(int_t) => match int_t { + RadIntId::U8 => { + let v = barcode as u8; + w.write_all(&v.to_le_bytes())?; + } + RadIntId::U16 => { + let v = barcode as u16; + w.write_all(&v.to_le_bytes())?; + } + RadIntId::U32 => { + let v = barcode as u32; + w.write_all(&v.to_le_bytes())?; + } + RadIntId::U64 => { + let v = barcode; + w.write_all(&v.to_le_bytes())?; + } + }, + _ => bail!("invalid type!"), + } + Ok(()) +} + +#[inline] +fn write_list( + tid_list: &[u32], + bct: &RadType, + bc: u64, + umit: &RadType, + umi: u64, + w: &mut W, +) -> anyhow::Result<()> { + assert!(!tid_list.is_empty(), "Trying to write empty tid_list"); + let na = tid_list.len() as u32; + w.write_all(&na.to_le_bytes()).unwrap(); + //bc + write_barcode(bct, bc, w)?; + //umi + write_barcode(umit, umi, w)?; + //write tid list + for t in tid_list.iter() { + w.write_all(&t.to_le_bytes()).unwrap(); + } + Ok(()) +} + pub fn bam2rad( input_file: P1, rad_file: P2, @@ -110,6 +159,7 @@ where } let ofile = File::create(rad_file.as_ref()).unwrap(); + // number of bytes in the input BAM file let bam_bytes = fs::metadata(&input_file).unwrap().len(); info! { log, @@ -117,10 +167,10 @@ where bam_bytes }; - // noodles reading + // reading input BAM using Noodles + // example from https://github.com/zaeleus/noodles/issues/227 let file = File::open(&input_file)?; - // example from https://github.com/zaeleus/noodles/issues/227 let mut reader: Box> = match input_file.as_ref().extension().and_then(|ext| ext.to_str()) { Some("bam") | Some("BAM") => { @@ -130,6 +180,12 @@ where 1_usize }) .expect("invalid nonzero usize"); + + println!( + "parsing BAM file using {:?} decompression threads", + decomp_threads + ); + let decoder: Box = Box::new( bgzf::MultithreadedReader::with_worker_count(decomp_threads, file), ); @@ -144,52 +200,20 @@ where } }; - /* - let header = reader.read_header()?; - - for result in reader.records(&header) { - let record = result?; - } - let hdrv = bam.header().to_owned(); - */ - let hdrv = reader.read_alignment_header()?; let mut data = Cursor::new(vec![]); - // initialize the header (do we need this ?) - // let mut hdr = libradicl::RadHeader::from_bam_header(&hdrv); - // number of chunks would be decided when we know - // let end_header_pos2 = hdr.get_size(); - // info!( - // log, - // "end header pos {:?}", - // end_header_pos2, - // ); - // file writer - // let owriter = Arc::new(Mutex::new(BufWriter::with_capacity(1048576, ofile))); // intermediate buffer - let mut owriter = BufWriter::with_capacity(1048576, ofile); + let mut owriter = BufWriter::with_capacity(1_048_576, ofile); // write the header { - // NOTE: This is hard-coded for unpaired single-cell data - // consider if we should generalize this - let is_paired = 0u8; - data.write_all(&is_paired.to_le_bytes()) - .expect("couldn't write to output file"); - let ref_count = hdrv.reference_sequences().len() as u64; - data.write_all(&ref_count.to_le_bytes()) - .expect("couldn't write to output file"); - // create longest buffer - for (k, _v) in hdrv.reference_sequences().iter() { - let name_size = k.len() as u16; - data.write_all(&name_size.to_le_bytes()) - .expect("coudn't write to output file"); - data.write_all(k).expect("coudn't write to output file"); - } - let initial_num_chunks = 0u64; - data.write_all(&initial_num_chunks.to_le_bytes()) - .expect("coudn't write to output file"); + // NOTE: The is_paired flag is not present in the + // SAM file and so isn't meaningful as written here. + // Also, the num_chunks will be 0 in this header + // currently. + let rad_header = RadHeader::from_bam_header(&hdrv); + rad_header.write(&mut data)?; } // test the header @@ -197,7 +221,8 @@ where info!(log, "ref count: {:?} ", hdrv.reference_sequences().len(),); } - // keep a pointer to header pos + // keep a pointer to header pos we need this to fill in the correct + // number of chunks later on. let end_header_pos = data.stream_position().unwrap() - std::mem::size_of::() as u64; // check header position @@ -218,36 +243,31 @@ where Err(e) => bail!("{}", e), }; + let bc_typeid: RadType; + let umi_typeid: RadType; + use bstr::BStr; // Tags we will have // write the tag meta-information section { // file-level - let mut num_tags = 2u16; - data.write_all(&num_tags.to_le_bytes()) - .expect("coudn't write to output file"); - // type-id - let mut typeid = 2u8; - let mut cb_tag_str = "cblen"; - let mut umi_tag_str = "ulen"; - - // str - type - libradicl::io::write_str_bin(cb_tag_str, &rad_types::RadIntId::U16, &mut data); - data.write_all(&typeid.to_le_bytes()) - .expect("coudn't write to output file"); - - // str - type - libradicl::io::write_str_bin(umi_tag_str, &rad_types::RadIntId::U16, &mut data); - data.write_all(&typeid.to_le_bytes()) - .expect("coudn't write to output file"); + let mut file_tags = TagSection::new_with_label(TagSectionLabel::FileTags); + file_tags.add_tag_desc(TagDesc { + name: "cblen".to_owned(), + typeid: RadType::Int(RadIntId::U16), + }); + file_tags.add_tag_desc(TagDesc { + name: "ulen".to_owned(), + typeid: RadType::Int(RadIntId::U16), + }); + + file_tags.write(&mut data)?; // read-level let flag_data = rec.data(); let bc_string_in: &str = if let Some(Ok(bcs)) = flag_data.get(&CR) { match bcs { - SamTagValue::String(bstr) => { - str::from_utf8(>::as_ref(bstr))? - } + SamTagValue::String(bstr) => str::from_utf8(>::as_ref(bstr))?, _ => { bail!("cannot convert non-string (Z) tag into barcode string."); } @@ -258,9 +278,7 @@ where let umi_string_in: &str = if let Some(Ok(umis)) = flag_data.get(&UR) { match umis { - SamTagValue::String(bstr) => { - str::from_utf8(>::as_ref(bstr))? - } + SamTagValue::String(bstr) => str::from_utf8(>::as_ref(bstr))?, _ => { bail!("cannot convert non-string (Z) tag into umi string."); } @@ -268,60 +286,53 @@ where } else { panic!("Input record missing UR tag!") }; - let bclen = bc_string_in.len() as u16; let umilen = umi_string_in.len() as u16; - data.write_all(&num_tags.to_le_bytes()) - .expect("coudn't write to output file"); - cb_tag_str = "b"; - umi_tag_str = "u"; - // type is conditional on barcode and umi length - let bc_typeid = match bclen { - 1..=4 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U8)).unwrap(), - 5..=8 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U16)).unwrap(), - 9..=16 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U32)).unwrap(), - 17..=32 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U64)).unwrap(), + bc_typeid = match bclen { + 1..=4 => RadType::Int(rad_types::RadIntId::U8), + 5..=8 => RadType::Int(rad_types::RadIntId::U16), + 9..=16 => RadType::Int(rad_types::RadIntId::U32), + 17..=32 => RadType::Int(rad_types::RadIntId::U64), l => { crit!(log, "cannot encode barcode of length {} > 32", l); std::process::exit(1); } }; - let umi_typeid = match umilen { - 1..=4 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U8)).unwrap(), - 5..=8 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U16)).unwrap(), - 9..=16 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U32)).unwrap(), - 17..=32 => rad_types::encode_type_tag(RadType::Int(rad_types::RadIntId::U64)).unwrap(), + umi_typeid = match umilen { + 1..=4 => RadType::Int(rad_types::RadIntId::U8), + 5..=8 => RadType::Int(rad_types::RadIntId::U16), + 9..=16 => RadType::Int(rad_types::RadIntId::U32), + 17..=32 => RadType::Int(rad_types::RadIntId::U64), l => { crit!(log, "cannot encode umi of length {} > 32", l); std::process::exit(1); } }; - //info!(log, "CB LEN : {}, UMI LEN : {}", bclen, umilen); - - libradicl::io::write_str_bin(cb_tag_str, &rad_types::RadIntId::U16, &mut data); - data.write_all(&bc_typeid.to_le_bytes()) - .expect("coudn't write to output file"); - - libradicl::io::write_str_bin(umi_tag_str, &rad_types::RadIntId::U16, &mut data); - data.write_all(&umi_typeid.to_le_bytes()) - .expect("coudn't write to output file"); + let mut read_tags = TagSection::new_with_label(TagSectionLabel::ReadTags); + read_tags.add_tag_desc(TagDesc { + name: "b".to_owned(), + typeid: bc_typeid, + }); + read_tags.add_tag_desc(TagDesc { + name: "u".to_owned(), + typeid: umi_typeid, + }); + read_tags.write(&mut data)?; // alignment-level - num_tags = 1u16; - data.write_all(&num_tags.to_le_bytes()) - .expect("couldn't write to output file"); - - // reference id - let refid_str = "compressed_ori_refid"; - typeid = 3u8; - libradicl::io::write_str_bin(refid_str, &rad_types::RadIntId::U16, &mut data); - data.write_all(&typeid.to_le_bytes()) - .expect("coudn't write to output file"); - + let mut aln_tags = TagSection::new_with_label(TagSectionLabel::AlignmentTags); + aln_tags.add_tag_desc(TagDesc { + name: "compressed_ori_refid".to_owned(), + typeid: RadType::Int(RadIntId::U32), + }); + aln_tags.write(&mut data)?; + + // done with tag descriptions + // now write the values associated with the file-level tags data.write_all(&bclen.to_le_bytes()) .expect("coudn't write to output file"); data.write_all(&umilen.to_le_bytes()) @@ -336,7 +347,7 @@ where // let initial_cond : bool = false ; // allocate data - let buf_limit = 10000u32; + let buf_limit = 10_000u32; data = Cursor::new(Vec::::with_capacity((buf_limit * 24) as usize)); data.write_all(&local_nrec.to_le_bytes()).unwrap(); data.write_all(&local_nrec.to_le_bytes()).unwrap(); @@ -397,21 +408,12 @@ where // local_nrec += 1; continue; } + // if this is new read and we need to write info // for the last read, _unless_ this is the very // first read, in which case we shall continue if !tid_list.is_empty() { - assert!(!tid_list.is_empty(), "Trying to write empty tid_list"); - let na = tid_list.len(); - data.write_all(&(na as u32).to_le_bytes()).unwrap(); - //bc - data.write_all(&(bc as u32).to_le_bytes()).unwrap(); - //umi - data.write_all(&(umi as u32).to_le_bytes()).unwrap(); - //write tid list - for t in tid_list.iter() { - data.write_all(&t.to_le_bytes()).unwrap(); - } + write_list(&tid_list, &bc_typeid, bc, &umi_typeid, umi, &mut data)?; } // dump if we reach the buf_limit @@ -434,14 +436,7 @@ where data = Cursor::new(Vec::::with_capacity((buf_limit * 24) as usize)); data.write_all(&local_nrec.to_le_bytes()).unwrap(); data.write_all(&local_nrec.to_le_bytes()).unwrap(); - - // for debugging - // if num_output_chunks > expected_bar_length-1 { - // break; - // } } - // let tname = tid_lookup.get(&(rec.tid() as u32)).unwrap(); - // let qname_string = str::from_utf8(rec.qname()).unwrap(); // if this is a new read update the old variables { @@ -493,36 +488,13 @@ where tid_list.push(tid); local_nrec += 1; } - // println!("{:?}\t{:?}\t{:?}\t{:?}\t{:?}\t{:?}", - // qname_string, - // tname, - // bc_string, - // umi_string, - // num_output_chunks, - // local_nrec, - // ); - //na TODO: make is independed of 16-10 length criterion - // for debugging - // if num_output_chunks > expected_bar_length-1 { - // break; - // } } if local_nrec > 0 { // println!("In the residual writing part"); // first fill the buffer with the last remaining read if !tid_list.is_empty() { - assert!(!tid_list.is_empty(), "Trying to write empty tid_list"); - let na = tid_list.len(); - data.write_all(&(na as u32).to_le_bytes()).unwrap(); - //bc - data.write_all(&(bc as u32).to_le_bytes()).unwrap(); - //umi - data.write_all(&(umi as u32).to_le_bytes()).unwrap(); - //write tid list - for t in tid_list.iter() { - data.write_all(&t.to_le_bytes()).unwrap(); - } + write_list(&tid_list, &bc_typeid, bc, &umi_typeid, umi, &mut data)?; } data.set_position(0); @@ -530,7 +502,7 @@ where let nrec = local_nrec; data.write_all(&nbytes.to_le_bytes()).unwrap(); data.write_all(&nrec.to_le_bytes()).unwrap(); - // owriter.lock().unwrap().write_all(data.get_ref()).unwrap(); + owriter.write_all(data.get_ref()).unwrap(); num_output_chunks += 1; } @@ -540,20 +512,6 @@ where println!(); info!(log, "{:?} chunks written", num_output_chunks,); - // owriter.lock().unwrap().flush(); - // owriter - // .lock() - // .unwrap() - // .get_ref() - // .seek(SeekFrom::Start( - // end_header_pos, - // )) - // .expect("couldn't seek in output file"); - // owriter - // .lock() - // .unwrap() - // .write_all(&num_output_chunks.to_le_bytes()) - // .expect("couldn't write to output file."); owriter.flush().expect("File buffer could not be flushed"); owriter .seek(SeekFrom::Start(end_header_pos)) From 3b09832f50ce057a2fd4ab14117b7578110c73f4 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 8 Mar 2024 00:57:18 -0500 Subject: [PATCH 09/13] update clap --- Cargo.lock | 222 ++--------------------------------------------------- Cargo.toml | 8 +- 2 files changed, 9 insertions(+), 221 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9301943..52d932a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,7 +57,6 @@ dependencies = [ "num_cpus", "petgraph", "rand", - "rust-htslib", "sce", "scroll", "serde", @@ -309,10 +308,6 @@ name = "cc" version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0ba8f7aaa012f30d5b2861462f6708eccd49c3c39863fe083a308035f63d723" -dependencies = [ - "jobserver", - "libc", -] [[package]] name = "cfg-if" @@ -322,9 +317,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.34" +version = "0.4.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" +checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" dependencies = [ "android-tzdata", "iana-time-zone", @@ -336,9 +331,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.1" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" +checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" dependencies = [ "clap_builder", "clap_derive", @@ -346,9 +341,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.1" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstream", "anstyle", @@ -375,15 +370,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" -[[package]] -name = "cmake" -version = "0.1.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" -dependencies = [ - "cc", -] - [[package]] name = "colorchoice" version = "1.0.0" @@ -492,12 +478,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" - [[package]] name = "dashmap" version = "5.5.3" @@ -606,24 +586,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "fs-utils" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fc7a9dc005c944c98a935e7fd626faf5bf7e5a609f94bc13e42fc4a02e52593" -dependencies = [ - "quick-error", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -645,12 +607,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "hashbrown" version = "0.14.3" @@ -669,20 +625,6 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" -[[package]] -name = "hts-sys" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deebfb779c734d542e7f14c298597914b9b5425e4089aef482eacb5cab941915" -dependencies = [ - "bzip2-sys", - "cc", - "fs-utils", - "glob", - "libz-sys", - "lzma-sys", -] - [[package]] name = "iana-time-zone" version = "0.1.60" @@ -706,22 +648,6 @@ dependencies = [ "cc", ] -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "ieee754" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9007da9cacbd3e6343da136e98b0d2df013f553d35bdec8b518f07bea768e19c" - [[package]] name = "indexmap" version = "2.2.5" @@ -780,15 +706,6 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" -[[package]] -name = "jobserver" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" -dependencies = [ - "libc", -] - [[package]] name = "js-sys" version = "0.3.69" @@ -893,12 +810,11 @@ dependencies = [ [[package]] name = "libradicl" version = "0.8.2" -source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#ae81af9e0a695fbb56b568197a1110ba687d4e1c" +source = "git+https://github.com/COMBINE-lab/libradicl?branch=develop#d10845e2a96f4eaf0a14f8aee136bdc0c8330acb" dependencies = [ "ahash", "anyhow", "bio-types", - "byteorder", "dashmap", "noodles-bam", "noodles-sam", @@ -920,25 +836,6 @@ dependencies = [ "redox_syscall", ] -[[package]] -name = "libz-sys" -version = "1.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" -dependencies = [ - "cc", - "cmake", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "linear-map" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee" - [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -1072,15 +969,6 @@ dependencies = [ "xz2", ] -[[package]] -name = "newtype_derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" -dependencies = [ - "rustc_version", -] - [[package]] name = "noodles" version = "0.65.0" @@ -1348,12 +1236,6 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - [[package]] name = "petgraph" version = "0.6.4" @@ -1397,12 +1279,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.35" @@ -1527,37 +1403,6 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" -[[package]] -name = "rust-htslib" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aec6f9ca4601beb4ae75ff8c99144dd15de5a873f6adf058da299962c760968e" -dependencies = [ - "bio-types", - "byteorder", - "custom_derive", - "derive-new", - "hts-sys", - "ieee754", - "lazy_static", - "libc", - "libz-sys", - "linear-map", - "newtype_derive", - "regex", - "thiserror", - "url", -] - -[[package]] -name = "rustc_version" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" -dependencies = [ - "semver", -] - [[package]] name = "rustix" version = "0.38.31" @@ -1618,12 +1463,6 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" -[[package]] -name = "semver" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" - [[package]] name = "serde" version = "1.0.197" @@ -1874,21 +1713,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "trait-set" version = "0.3.0" @@ -1926,56 +1750,24 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-width" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" -[[package]] -name = "url" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "version_check" version = "0.9.4" diff --git a/Cargo.toml b/Cargo.toml index db0c9a3..3251377 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,7 @@ slog-async = "2.8.0" smallvec = "1.13.1" snap = "1.1.1" rand = "0.8.5" -chrono = "0.4.34" +chrono = "0.4.35" csv = "1.3.0" mimalloc = { version = "0.1.39", default-features = false } num-format = "0.4.4" @@ -70,15 +70,11 @@ bio-types = { version = "1.0.1", default-features = true, features = ["serde"] } itertools = "0.12.1" thiserror = "1.0.57" statrs = "0.16.0" -rust-htslib = { version = "0.46.0", default-features = false, features = [ - "bzip2", - "lzma", -] } sce = { git = "https://github.com/parazodiac/SingleCellExperiment", branch = "dev", version = "0.2.0" } # no shenanigans; clap makes breaking "fixes" too often to allow variability # in the version different from what we tested with -clap = { version = "=4.5.1", features = ["derive", "wrap_help", "cargo", "help", "usage", "string", "error-context"] } +clap = { version = "=4.5.2", features = ["derive", "wrap_help", "cargo", "help", "usage", "string", "error-context"] } noodles = { version = "0.65.0", features = ["bam", "bgzf", "sam"] } noodles-util = { version = "0.37.0", features = ["alignment"] } From 49d21c9c49e4fdcd298f736d6134632c2904a609 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 8 Mar 2024 00:57:41 -0500 Subject: [PATCH 10/13] update cargo lock --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52d932a..09b2060 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -254,9 +254,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.3" +version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytecount" @@ -305,9 +305,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.89" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0ba8f7aaa012f30d5b2861462f6708eccd49c3c39863fe083a308035f63d723" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" [[package]] name = "cfg-if" From 3fa0c86239798b44e3c5a5f4363947a1d513d208 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 8 Mar 2024 12:17:55 -0500 Subject: [PATCH 11/13] tweak progress bar --- src/convert.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/convert.rs b/src/convert.rs index a1aa67a..5861f40 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -352,6 +352,10 @@ where data.write_all(&local_nrec.to_le_bytes()).unwrap(); data.write_all(&local_nrec.to_le_bytes()).unwrap(); + // empiricaly derived factor of size of bam vs rad + // encoding for records. + let approx_bam_to_rad_factor = 6.258_f64; + // calculate number of records // let mut total_number_of_records = 0u64; // for r in bam.records(){ @@ -366,8 +370,8 @@ where .expect("ProgressStyle template was invalid.") .progress_chars("╢▌▌░╟"); - let expected_bar_length = bam_bytes / ((buf_limit as u64) * 24); - // let expected_bar_length = 50u64 ;// bam_bytes / ((buf_limit as u64) * 24); + let expected_bar_length = + bam_bytes / (((buf_limit as f64) * 24_f64) * approx_bam_to_rad_factor).round() as u64; let pbar_inner = ProgressBar::new(expected_bar_length); pbar_inner.set_style(sty); From 031fa2e76c332da563dbd4f13c22b5e25cabe5a7 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 8 Mar 2024 14:39:41 -0500 Subject: [PATCH 12/13] cleanup reheader --- scripts/bsd-3.tmpl | 6 ++++++ scripts/reheader.sh | 10 ++++++++++ src/cellfilter.rs | 4 ++-- src/cmd_parse_utils.rs | 8 ++++++++ src/collate.rs | 4 ++-- src/constants.rs | 4 ++-- src/convert.rs | 26 +++----------------------- src/em.rs | 4 ++-- src/eq_class.rs | 8 ++++++++ src/infer.rs | 4 ++-- src/io_utils.rs | 9 +++++++++ src/lib.rs | 4 ++-- src/main.rs | 4 ++-- src/prog_opts.rs | 9 +++++++++ src/pugutils.rs | 4 ++-- src/quant.rs | 4 ++-- src/utils.rs | 8 ++++++++ 17 files changed, 79 insertions(+), 41 deletions(-) create mode 100644 scripts/bsd-3.tmpl create mode 100755 scripts/reheader.sh diff --git a/scripts/bsd-3.tmpl b/scripts/bsd-3.tmpl new file mode 100644 index 0000000..2828051 --- /dev/null +++ b/scripts/bsd-3.tmpl @@ -0,0 +1,6 @@ +Copyright (c) ${years} ${owner}. + +This file is part of ${projectname} +(see ${projecturl}). + +License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause diff --git a/scripts/reheader.sh b/scripts/reheader.sh new file mode 100755 index 0000000..5c99dcd --- /dev/null +++ b/scripts/reheader.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +if ! command -v licenseheaders &> /dev/null +then + echo "licenseheaders could not be found; please install this program to use the script (pip install licenseheaders)." + exit 1 +fi + + +licenseheaders -d ../src -y 2020-2024 -t bsd-3.tmpl -o COMBINE-lab -n alevin-fry -u https://www.github.com/COMBINE-lab/alevin-fry -D -E rs diff --git a/src/cellfilter.rs b/src/cellfilter.rs index c191c17..a9131fd 100644 --- a/src/cellfilter.rs +++ b/src/cellfilter.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/cmd_parse_utils.rs b/src/cmd_parse_utils.rs index dd66ce2..c947ac5 100644 --- a/src/cmd_parse_utils.rs +++ b/src/cmd_parse_utils.rs @@ -1,3 +1,11 @@ +/* + * Copyright (c) 2020-2024 COMBINE-lab. + * + * This file is part of alevin-fry + * (see https://www.github.com/COMBINE-lab/alevin-fry). + * + * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause + */ use crate::quant::{ResolutionStrategy, SplicedAmbiguityModel}; use clap; use std::path::{Path, PathBuf}; diff --git a/src/collate.rs b/src/collate.rs index 10fc3d6..d296fd1 100644 --- a/src/collate.rs +++ b/src/collate.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/constants.rs b/src/constants.rs index 9443bf6..9a4ad7a 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/convert.rs b/src/convert.rs index 5861f40..4247f7d 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ @@ -542,19 +542,7 @@ where let mut br = BufReader::new(i_file); let prelude = RadPrelude::from_bytes(&mut br)?; let hdr = &prelude.hdr; - // info!( - // log, - // "paired : {:?}, ref_count : {}, num_chunks : {}", - // hdr.is_paired != 0, - // hdr.ref_count.to_formatted_string(&Locale::en), - // hdr.num_chunks.to_formatted_string(&Locale::en) - // ); - // file-level - //let _fl_tags = rad_types::TagSection::from_bytes(&mut br); - // info!(log, "read {:?} file-level tags", fl_tags.tags.len()); - // read-level let rl_tags = &prelude.read_tags; - // info!(log, "read {:?} read-level tags", rl_tags.tags.len()); // right now, we only handle BC and UMI types of U8—U64, so validate that const BNAME: &str = "b"; @@ -585,10 +573,6 @@ where assert!(bct.is_some(), "barcode type tag was missing!"); assert!(umit.is_some(), "umi type tag was missing!"); - // alignment-level - // let _al_tags = rad_types::TagSection::from_bytes(&mut br); - // info!(log, "read {:?} alignemnt-level tags", al_tags.tags.len()); - let file_tag_map = prelude.file_tags.parse_tags_from_bytes(&mut br)?; info!(log, "File-level tag map {:?}", file_tag_map); @@ -633,7 +617,7 @@ where handle, "ID:{}\tHI:{}\tNH:{}\tCB:{}\tUMI:{}\tDIR:{:?}\t{}", id, - i, + i + 1, num_entries, unsafe { std::str::from_utf8_unchecked(&bitmer_to_bytes(bc_mer)[..]) }, unsafe { std::str::from_utf8_unchecked(&bitmer_to_bytes(umi_mer)[..]) }, @@ -649,10 +633,6 @@ where return Ok(num_reads); } }; - - // writeln!(handle,"{:?}\t{:?}\t{:?}\t{:?}", - // bc,umi,read.dirs[i], - // str::from_utf8(&tid_),); } id += 1; } diff --git a/src/em.rs b/src/em.rs index 35c239d..526dc48 100644 --- a/src/em.rs +++ b/src/em.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/eq_class.rs b/src/eq_class.rs index de8078c..1438277 100644 --- a/src/eq_class.rs +++ b/src/eq_class.rs @@ -1,3 +1,11 @@ +/* + * Copyright (c) 2020-2024 COMBINE-lab. + * + * This file is part of alevin-fry + * (see https://www.github.com/COMBINE-lab/alevin-fry). + * + * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause + */ use std::collections::HashMap; use std::hash::{BuildHasher, Hasher}; use std::io::BufRead; diff --git a/src/infer.rs b/src/infer.rs index abd70ec..37a7174 100644 --- a/src/infer.rs +++ b/src/infer.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/io_utils.rs b/src/io_utils.rs index d789e47..5c6a1a3 100644 --- a/src/io_utils.rs +++ b/src/io_utils.rs @@ -1,3 +1,12 @@ +/* + * Copyright (c) 2020-2024 COMBINE-lab. + * + * This file is part of alevin-fry + * (see https://www.github.com/COMBINE-lab/alevin-fry). + * + * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause + */ + /// some (hopefully) generally useful I/O related utilities use anyhow::Context; use crossbeam_queue::ArrayQueue; diff --git a/src/lib.rs b/src/lib.rs index 2d07e1b..aab2b10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/main.rs b/src/main.rs index 2f57c31..12fd613 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2024 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/prog_opts.rs b/src/prog_opts.rs index 760effa..a57a857 100644 --- a/src/prog_opts.rs +++ b/src/prog_opts.rs @@ -1,3 +1,12 @@ +/* + * Copyright (c) 2020-2024 COMBINE-lab. + * + * This file is part of alevin-fry + * (see https://www.github.com/COMBINE-lab/alevin-fry). + * + * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause + */ + //use derive_builder::Builder; use bio_types::strand::Strand; use serde::Serialize; diff --git a/src/pugutils.rs b/src/pugutils.rs index 388289a..f9880af 100644 --- a/src/pugutils.rs +++ b/src/pugutils.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/quant.rs b/src/quant.rs index d0b2017..3d2222f 100644 --- a/src/quant.rs +++ b/src/quant.rs @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2022 Rob Patro, Avi Srivastava, Hirak Sarkar, Dongze He, Mohsen Zakeri. + * Copyright (c) 2020-2024 COMBINE-lab. * * This file is part of alevin-fry - * (see https://github.com/COMBINE-lab/alevin-fry). + * (see https://www.github.com/COMBINE-lab/alevin-fry). * * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause */ diff --git a/src/utils.rs b/src/utils.rs index 67c8cb9..0e54c74 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,11 @@ +/* + * Copyright (c) 2020-2024 COMBINE-lab. + * + * This file is part of alevin-fry + * (see https://www.github.com/COMBINE-lab/alevin-fry). + * + * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause + */ use crate::constants as afconst; use crate::eq_class::IndexedEqList; use anyhow::{anyhow, Context}; From 3c27c5d426788143fa867acb3d7fa287d5f9d076 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 8 Mar 2024 15:16:18 -0500 Subject: [PATCH 13/13] enable cargo dist --- Cargo.toml | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3251377..eeefa95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "alevin-fry" -version = "0.8.2" +version = "0.9.0" authors = [ "Avi Srivastava ", "Hirak Sarkar ", @@ -32,8 +32,6 @@ keywords = [ ] categories = ["command-line-utilities", "science"] -[workspace] - [dependencies] # for local development, look in the libradicl git repository # but when published, pull the specified version @@ -45,7 +43,6 @@ bincode = "1.3.3" bstr = "1.9.1" crossbeam-channel = "0.5.12" crossbeam-queue = "0.3.11" -# derive_builder = "0.11.2" typed-builder = "0.18.1" indicatif = "0.17.8" needletail = "0.5.1" @@ -75,6 +72,7 @@ sce = { git = "https://github.com/parazodiac/SingleCellExperiment", branch = "de # no shenanigans; clap makes breaking "fixes" too often to allow variability # in the version different from what we tested with clap = { version = "=4.5.2", features = ["derive", "wrap_help", "cargo", "help", "usage", "string", "error-context"] } + noodles = { version = "0.65.0", features = ["bam", "bgzf", "sam"] } noodles-util = { version = "0.37.0", features = ["alignment"] } @@ -83,3 +81,24 @@ noodles-util = { version = "0.37.0", features = ["alignment"] } lto = "thin" #codegen-units=1 opt-level = 3 + +# The profile that 'cargo dist' will build with +[profile.dist] +inherits = "release" +lto = "thin" + +# Config for 'cargo dist' +[workspace.metadata.dist] +# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.11.1" +# CI backends to support +ci = ["github"] +# The installers to generate for each app +installers = ["shell"] +# Target platforms to build apps for (Rust target-triple syntax) +targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu"] +# Publish jobs to run in CI +pr-run-mode = "plan" + +[workspace.metadata.dist.github-custom-runners] +aarch64-apple-darwin = "macos-14"