diff --git a/Cargo.lock b/Cargo.lock index cbc9b92c..1cbfaf1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" version = "0.8.11" @@ -63,9 +74,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.14" +version = "0.6.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ "anstyle", "anstyle-parse", @@ -78,33 +89,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anstyle-parse" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", "windows-sys 0.52.0", @@ -116,6 +127,15 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -130,9 +150,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "assert_cmd" -version = "2.0.14" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" +checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37" dependencies = [ "anstyle", "bstr", @@ -252,9 +272,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" dependencies = [ "memchr", "regex-automata", @@ -273,6 +293,12 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.6.1" @@ -285,6 +311,27 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cassowary" version = "0.3.0" @@ -302,9 +349,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" +checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" +dependencies = [ + "jobserver", + "libc", +] [[package]] name = "cfg-if" @@ -357,11 +408,21 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "clap" -version = "4.5.9" +version = "4.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3" dependencies = [ "clap_builder", "clap_derive", @@ -369,9 +430,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.9" +version = "4.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa" dependencies = [ "anstream", "anstyle", @@ -381,9 +442,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.8" +version = "4.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" dependencies = [ "heck", "proc-macro2", @@ -393,9 +454,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "clru" @@ -405,9 +466,9 @@ checksum = "cbd0f76e066e64fdc5631e3bb46381254deab9ef1158292f27c8c57e3bf3fe59" [[package]] name = "colorchoice" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" [[package]] name = "compact_str" @@ -483,6 +544,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.4.2" @@ -557,7 +633,7 @@ dependencies = [ "bitflags 2.6.0", "crossterm_winapi", "libc", - "mio", + "mio 0.8.11", "parking_lot", "serde", "signal-hook", @@ -590,6 +666,12 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" +[[package]] +name = "deflate64" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" + [[package]] name = "deranged" version = "0.3.11" @@ -599,6 +681,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "difflib" version = "0.4.0" @@ -637,6 +730,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -920,9 +1024,9 @@ dependencies = [ [[package]] name = "gix-actor" -version = "0.31.4" +version = "0.31.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b8ee65074b2bbb91d9d97c15d172ea75043aefebf9869b5b329149dc76501c" +checksum = "a0e454357e34b833cc3a00b6efbbd3dd4d18b24b9fb0c023876ec2645e8aa3f2" dependencies = [ "bstr", "gix-date", @@ -934,9 +1038,9 @@ dependencies = [ [[package]] name = "gix-attributes" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eefb48f42eac136a4a0023f49a54ec31be1c7a9589ed762c45dcb9b953f7ecc8" +checksum = "e37ce99c7e81288c28b703641b6d5d119aacc45c1a6b247156e6249afa486257" dependencies = [ "bstr", "gix-glob", @@ -969,9 +1073,9 @@ dependencies = [ [[package]] name = "gix-command" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c22e086314095c43ffe5cdc5c0922d5439da4fd726f3b0438c56147c34dc225" +checksum = "0d76867867da891cbe32021ad454e8cae90242f6afb06762e4dd0d357afd1d7b" dependencies = [ "bstr", "gix-path", @@ -981,9 +1085,9 @@ dependencies = [ [[package]] name = "gix-commitgraph" -version = "0.24.2" +version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b102311085da4af18823413b5176d7c500fb2272eaf391cfa8635d8bcb12c4" +checksum = "133b06f67f565836ec0c473e2116a60fb74f80b6435e21d88013ac0e3c60fc78" dependencies = [ "bstr", "gix-chunk", @@ -1016,9 +1120,9 @@ dependencies = [ [[package]] name = "gix-config-value" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbd06203b1a9b33a78c88252a625031b094d9e1b647260070c25b09910c0a804" +checksum = "b328997d74dd15dc71b2773b162cb4af9a25c424105e4876e6d0686ab41c383e" dependencies = [ "bitflags 2.6.0", "bstr", @@ -1029,9 +1133,9 @@ dependencies = [ [[package]] name = "gix-credentials" -version = "0.24.2" +version = "0.24.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c70146183bd3c7119329a3c7392d1aa0e0adbe48d727f4df31828fe6d8fdaa1" +checksum = "198588f532e4d1202e04e6c3f50e4d7c060dffc66801c6f53cc246f1d234739e" dependencies = [ "bstr", "gix-command", @@ -1058,9 +1162,9 @@ dependencies = [ [[package]] name = "gix-diff" -version = "0.44.0" +version = "0.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b9bd8b2d07b6675a840b56a6c177d322d45fa082672b0dad8f063b25baf0a4" +checksum = "1996d5c8a305b59709467d80617c9fde48d9d75fd1f4179ea970912630886c9d" dependencies = [ "bstr", "gix-hash", @@ -1110,9 +1214,9 @@ dependencies = [ [[package]] name = "gix-filter" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00ce6ea5ac8fca7adbc63c48a1b9e0492c222c386aa15f513405f1003f2f4ab2" +checksum = "e6547738da28275f4dff4e9f3a0f28509f53f94dd6bd822733c91cb306bca61a" dependencies = [ "bstr", "encoding_rs", @@ -1131,9 +1235,9 @@ dependencies = [ [[package]] name = "gix-fs" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3338ff92a2164f5209f185ec0cd316f571a72676bb01d27e22f2867ba69f77a" +checksum = "6adf99c27cdf17b1c4d77680c917e0d94d8783d4e1c73d3be0d1d63107163d7a" dependencies = [ "fastrand", "gix-features", @@ -1142,9 +1246,9 @@ dependencies = [ [[package]] name = "gix-glob" -version = "0.16.3" +version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a29ad0990cf02c48a7aac76ed0dbddeb5a0d070034b83675cc3bbf937eace4" +checksum = "fa7df15afa265cc8abe92813cd354d522f1ac06b29ec6dfa163ad320575cb447" dependencies = [ "bitflags 2.6.0", "bstr", @@ -1175,9 +1279,9 @@ dependencies = [ [[package]] name = "gix-ignore" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "640dbeb4f5829f9fc14d31f654a34a0350e43a24e32d551ad130d99bf01f63f1" +checksum = "5e6afb8f98e314d4e1adc822449389ada863c174b5707cedd327d67b84dba527" dependencies = [ "bstr", "gix-glob", @@ -1188,9 +1292,9 @@ dependencies = [ [[package]] name = "gix-index" -version = "0.33.0" +version = "0.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d8c5a5f1c58edcbc5692b174cda2703aba82ed17d7176ff4c1752eb48b1b167" +checksum = "9a9a44eb55bd84bb48f8a44980e951968ced21e171b22d115d1cdcef82a7d73f" dependencies = [ "bitflags 2.6.0", "bstr", @@ -1238,9 +1342,9 @@ dependencies = [ [[package]] name = "gix-negotiate" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d57dec54544d155a495e01de947da024471e1825d7d3f2724301c07a310d6184" +checksum = "9ec879fb6307bb63519ba89be0024c6f61b4b9d61f1a91fd2ce572d89fe9c224" dependencies = [ "bitflags 2.6.0", "gix-commitgraph", @@ -1273,9 +1377,9 @@ dependencies = [ [[package]] name = "gix-odb" -version = "0.61.0" +version = "0.61.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e92b9790e2c919166865d0825b26cc440a387c175bed1b43a2fa99c0e9d45e98" +checksum = "20d384fe541d93d8a3bb7d5d5ef210780d6df4f50c4e684ccba32665a5e3bc9b" dependencies = [ "arc-swap", "gix-date", @@ -1293,9 +1397,9 @@ dependencies = [ [[package]] name = "gix-pack" -version = "0.51.0" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a8da51212dbff944713edb2141ed7e002eea326b8992070374ce13a6cb610b3" +checksum = "3e0594491fffe55df94ba1c111a6566b7f56b3f8d2e1efc750e77d572f5f5229" dependencies = [ "clru", "gix-chunk", @@ -1351,9 +1455,9 @@ dependencies = [ [[package]] name = "gix-pathspec" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76cab098dc10ba2d89f634f66bf196dea4d7db4bf10b75c7a9c201c55a2ee19" +checksum = "d307d1b8f84dc8386c4aa20ce0cf09242033840e15469a3ecba92f10cfb5c046" dependencies = [ "bitflags 2.6.0", "bstr", @@ -1366,9 +1470,9 @@ dependencies = [ [[package]] name = "gix-prompt" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fddabbc7c51c241600ab3c4623b19fa53bde7c1a2f637f61043ed5fcadf000cc" +checksum = "7e0595d2be4b6d6a71a099e989bdd610882b882da35fb8503d91d6f81aa0936f" dependencies = [ "gix-command", "gix-config-value", @@ -1379,9 +1483,9 @@ dependencies = [ [[package]] name = "gix-protocol" -version = "0.45.1" +version = "0.45.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c140d4c6d209048826bad78f021a01b612830f89da356efeb31afe8957f8bee" +checksum = "bad8da8e89f24177bd77947092199bb13dcc318bbd73530ba8a05e6d6adaaa9d" dependencies = [ "bstr", "gix-credentials", @@ -1430,9 +1534,9 @@ dependencies = [ [[package]] name = "gix-refspec" -version = "0.23.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde848865834a54fe4d9b4573f15d0e9a68eaf3d061b42d3ed52b4b8acf880b2" +checksum = "6868f8cd2e62555d1f7c78b784bece43ace40dd2a462daf3b588d5416e603f37" dependencies = [ "bstr", "gix-hash", @@ -1444,25 +1548,23 @@ dependencies = [ [[package]] name = "gix-revision" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e08f8107ed1f93a83bcfbb4c38084c7cb3f6cd849793f1d5eec235f9b13b2b" +checksum = "01b13e43c2118c4b0537ddac7d0821ae0dfa90b7b8dbf20c711e153fb749adce" dependencies = [ "bstr", "gix-date", "gix-hash", - "gix-hashtable", "gix-object", "gix-revwalk", - "gix-trace", "thiserror", ] [[package]] name = "gix-revwalk" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4181db9cfcd6d1d0fd258e91569dbb61f94cb788b441b5294dd7f1167a3e788f" +checksum = "1b030ccaab71af141f537e0225f19b9e74f25fefdba0372246b844491cab43e0" dependencies = [ "gix-commitgraph", "gix-date", @@ -1475,9 +1577,9 @@ dependencies = [ [[package]] name = "gix-sec" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fddc27984a643b20dd03e97790555804f98cf07404e0e552c0ad8133266a79a1" +checksum = "1547d26fa5693a7f34f05b4a3b59a90890972922172653bcb891ab3f09f436df" dependencies = [ "bitflags 2.6.0", "gix-path", @@ -1502,9 +1604,9 @@ dependencies = [ [[package]] name = "gix-tempfile" -version = "14.0.0" +version = "14.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3b0e276cd08eb2a22e9f286a4f13a222a01be2defafa8621367515375644b99" +checksum = "006acf5a613e0b5cf095d8e4b3f48c12a60d9062aa2b2dd105afaf8344a5600c" dependencies = [ "gix-fs", "libc", @@ -1521,9 +1623,9 @@ checksum = "f924267408915fddcd558e3f37295cc7d6a3e50f8bd8b606cee0808c3915157e" [[package]] name = "gix-transport" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb0ffa5f869977f5b9566399154055902f05d7e85c787d5eacf551acdd0c4adf" +checksum = "27c02b83763ffe95bcc27ce5821b2b7f843315a009c06f1cd59c9b66c508c058" dependencies = [ "base64 0.22.1", "bstr", @@ -1540,9 +1642,9 @@ dependencies = [ [[package]] name = "gix-traverse" -version = "0.39.1" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f20cb69b63eb3e4827939f42c05b7756e3488ef49c25c412a876691d568ee2a0" +checksum = "e499a18c511e71cf4a20413b743b9f5bcf64b3d9e81e9c3c6cd399eae55a8840" dependencies = [ "bitflags 2.6.0", "gix-commitgraph", @@ -1557,9 +1659,9 @@ dependencies = [ [[package]] name = "gix-url" -version = "0.27.3" +version = "0.27.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0db829ebdca6180fbe32be7aed393591df6db4a72dbbc0b8369162390954d1cf" +checksum = "e2eb9b35bba92ea8f0b5ab406fad3cf6b87f7929aa677ff10aa042c6da621156" dependencies = [ "bstr", "gix-features", @@ -1591,9 +1693,9 @@ dependencies = [ [[package]] name = "gix-worktree" -version = "0.34.0" +version = "0.34.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f6b7de83839274022aff92157d7505f23debf739d257984a300a35972ca94e" +checksum = "26f7326ebe0b9172220694ea69d344c536009a9b98fb0f9de092c440f3efe7a6" dependencies = [ "bstr", "gix-attributes", @@ -1610,9 +1712,9 @@ dependencies = [ [[package]] name = "gix-worktree-state" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e64b2835892ce553b15aef7f6f7bb1e39e146fdf71eb99609b86710a7786cf34" +checksum = "39ed6205b5f51067a485b11843babcf3304c0799e265a06eb0dde7f69cd85cd8" dependencies = [ "bstr", "gix-features", @@ -1683,6 +1785,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "hex" version = "0.4.3" @@ -1879,6 +1987,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "io-close" version = "0.3.7" @@ -1903,9 +2020,9 @@ checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" [[package]] name = "is_terminal_polyfill" -version = "1.70.0" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "iso8601" @@ -2002,6 +2119,15 @@ dependencies = [ "serde", ] +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -2066,9 +2192,9 @@ dependencies = [ [[package]] name = "kstring" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e703acfd696000db3f6d1238e23b3d1f889192e1e439969c44e8423bb7a5655e" +checksum = "558bf9508a558512042d3095138b1f7b8fe90c5467d94f9f1da28b3731c5dbd1" dependencies = [ "static_assertions", ] @@ -2117,6 +2243,12 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "lockfree-object-pool" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" + [[package]] name = "log" version = "0.4.22" @@ -2132,6 +2264,16 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lzma-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" +dependencies = [ + "byteorder", + "crc", +] + [[package]] name = "markdown" version = "1.0.0-alpha.18" @@ -2179,9 +2321,9 @@ dependencies = [ [[package]] name = "memo-map" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374c335b2df19e62d4cb323103473cbc6510980253119180de862d89184f6a83" +checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" [[package]] name = "miette" @@ -2237,9 +2379,9 @@ dependencies = [ [[package]] name = "minijinja-contrib" -version = "2.0.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "237307aea18e4cc6b3ba0d190ee46f06ff82944a99cc895f04e4c94ccbf121d9" +checksum = "6853ef2340c668281c5ea86b04da2ebb2fc9e98a7185a887591de4cac945d5b5" dependencies = [ "minijinja", "serde", @@ -2272,6 +2414,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "mio" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" +dependencies = [ + "hermit-abi", + "libc", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "nom" version = "7.1.3" @@ -2378,9 +2532,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.1" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" dependencies = [ "memchr", ] @@ -2391,21 +2545,6 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "opentelemetry" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" -dependencies = [ - "futures-core", - "futures-sink", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror", - "urlencoding", -] - [[package]] name = "opentelemetry" version = "0.23.0" @@ -2478,9 +2617,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "ordered-float" -version = "4.2.1" +version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ff2cf528c6c03d9ed653d6c4ce1dc0582dc4af309790ad92f07c1cd551b0be" +checksum = "4a91171844676f8c7990ce64959210cd2eaef32c2612c50f9fae9f8aaa6065a6" dependencies = [ "num-traits", "rand 0.8.5", @@ -2538,6 +2677,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + [[package]] name = "pem" version = "3.0.4" @@ -2624,6 +2773,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "pori" version = "0.0.0" @@ -2647,9 +2802,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "predicates" -version = "3.1.0" +version = "3.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8" +checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" dependencies = [ "anstyle", "difflib", @@ -2658,15 +2813,15 @@ dependencies = [ [[package]] name = "predicates-core" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" +checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931" [[package]] name = "predicates-tree" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13" dependencies = [ "predicates-core", "termtree", @@ -2917,9 +3072,9 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "regorus" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b74c40edcecf90b1f39ecc7bd445e31d361b570fdd483c892542da6fbf117195" +checksum = "2f12c2c388b069c2ca5a56667039fbc2bf161e1a1be2abe1304fa4899841e8bc" dependencies = [ "anyhow", "chrono", @@ -3044,9 +3199,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.11" +version = "0.23.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" +checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" dependencies = [ "log", "once_cell", @@ -3203,20 +3358,21 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "4ab380d7d9f22ef3f21ad3e6c1ebe8e4fc7a2000ccba2e4d71fc96f15b2cb609" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] [[package]] name = "serde_spanned" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +checksum = "eb5b1b31579f3811bf615c144393417496f152e12ac8b7663bf664f4a815306d" dependencies = [ "serde", ] @@ -3292,12 +3448,12 @@ dependencies = [ [[package]] name = "signal-hook-mio" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" +checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" dependencies = [ "libc", - "mio", + "mio 0.8.11", "signal-hook", ] @@ -3310,6 +3466,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "similar" version = "2.6.0" @@ -3480,6 +3642,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempdir" version = "0.3.7" @@ -3599,17 +3772,17 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.38.1" +version = "1.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" +checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" dependencies = [ "backtrace", "bytes", "libc", - "mio", + "mio 1.0.1", "pin-project-lite", "socket2", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -3638,9 +3811,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac2caab0bf757388c6c0ae23b3293fdb463fee59434529014f85e3263b995c28" +checksum = "81967dd0dd2c1ab0bc3468bd7caecc32b8a4aa47d0c8c695d8c2b2108168d62c" dependencies = [ "serde", "serde_spanned", @@ -3650,18 +3823,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "f8fb9f64314842840f1d940ac544da178732128f1c78c21772e876579e0da1db" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.16" +version = "0.22.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "278f3d518e152219c994ce877758516bca5e118eaed6996192a774fb9fbf0788" +checksum = "8d9f8729f5aea9562aac1cc0441f5d6de3cff1ee0c5d67293eeca5eb36ee7c16" dependencies = [ "indexmap", "serde", @@ -3891,9 +4064,9 @@ dependencies = [ [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wait-timeout" @@ -4045,12 +4218,20 @@ name = "weaver_cache" version = "0.7.0" dependencies = [ "dirs", + "flate2", "gix", "miette", + "once_cell", + "regex", "serde", + "tar", "tempdir", "thiserror", + "ureq", + "url", + "walkdir", "weaver_common", + "zip", ] [[package]] @@ -4120,7 +4301,7 @@ dependencies = [ "miette", "minijinja", "minijinja-contrib", - "opentelemetry 0.22.0", + "opentelemetry 0.23.0", "opentelemetry-stdout", "opentelemetry_sdk", "rayon", @@ -4413,9 +4594,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.15" +version = "0.6.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557404e450152cd6795bb558bca69e43c585055f4606e3bcae5894fc6dac9ba0" +checksum = "b480ae9340fc261e6be3e95a1ba86d54ae3f9171132a73ce8d4bbaf68339507c" dependencies = [ "memchr", ] @@ -4430,6 +4611,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + [[package]] name = "xtask" version = "0.1.0" @@ -4463,3 +4655,88 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zip" +version = "2.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b895748a3ebcb69b9d38dcfdf21760859a4b0d0b0015277640c2ef4c69640e6f" +dependencies = [ + "aes", + "arbitrary", + "bzip2", + "constant_time_eq", + "crc32fast", + "crossbeam-utils", + "deflate64", + "displaydoc", + "flate2", + "hmac", + "indexmap", + "lzma-rs", + "memchr", + "pbkdf2", + "rand 0.8.5", + "sha1", + "thiserror", + "time", + "zeroize", + "zopfli", + "zstd", +] + +[[package]] +name = "zopfli" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946" +dependencies = [ + "bumpalo", + "crc32fast", + "lockfree-object-pool", + "log", + "once_cell", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.12+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index e48adcad..6c36f354 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ serde = { version = "1.0.204", features = ["derive"] } serde_yaml = "0.9.32" serde_json = { version = "1.0.116"} thiserror = "1.0.63" +url = "2.5.2" ureq = "2.10.0" regex = "1.10.5" rayon = "1.10.0" @@ -53,6 +54,8 @@ include_dir = "0.7.4" tempdir = "0.3.7" schemars = "0.8.21" dirs = "5.0.1" +once_cell = "1.19.0" +opentelemetry = { version = "0.23.0", features = ["trace", "metrics", "logs", "otel_unstable"] } # Features definition ========================================================= [features] @@ -108,7 +111,7 @@ inherits = "release" lto = "thin" [workspace.lints.rust] -rust_2018_idioms = "warn" +rust_2018_idioms = { level = "warn", priority = -1 } unused_lifetimes = "warn" # unreachable_pub = "warn" semicolon_in_expressions_from_macros = "warn" diff --git a/crates/weaver_cache/Cargo.toml b/crates/weaver_cache/Cargo.toml index 7acd11a3..29bd57de 100644 --- a/crates/weaver_cache/Cargo.toml +++ b/crates/weaver_cache/Cargo.toml @@ -23,8 +23,17 @@ gix = { version = "0.63.0", default-features = false, features = [ "worktree-mutation", "blocking-http-transport-reqwest-rust-tls", ] } +flate2 = "1.0.30" +tar = "0.4.41" +zip = "2.1.5" thiserror.workspace = true serde.workspace = true miette.workspace = true +once_cell.workspace = true +regex.workspace = true +url.workspace = true +ureq.workspace = true +[dev-dependencies] +walkdir.workspace = true diff --git a/crates/weaver_cache/src/lib.rs b/crates/weaver_cache/src/lib.rs index 2d605192..724be3dc 100644 --- a/crates/weaver_cache/src/lib.rs +++ b/crates/weaver_cache/src/lib.rs @@ -1,18 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 -//! A cache system for OTel Weaver. -//! -//! Semantic conventions, schemas and other assets are cached -//! locally to avoid fetching them from the network every time. +//! A Semantic Convention Repository abstraction for OTel Weaver. use std::default::Default; -use std::fs::create_dir_all; +use std::fs::{create_dir_all, File}; +use std::io; use std::num::NonZeroU32; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; -use std::sync::Mutex; -use crate::Error::GitError; use gix::clone::PrepareFetch; use gix::create::Kind; use gix::remote::fetch::Shallow; @@ -20,8 +16,20 @@ use gix::{create, open, progress}; use miette::Diagnostic; use serde::Serialize; use tempdir::TempDir; +use url::Url; + use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; +use crate::registry_path::RegistryPath; +use crate::Error::{GitError, InvalidRegistryArchive, UnsupportedRegistryArchive}; + +pub mod registry_path; + +/// The extension for a tar gz archive. +const TAR_GZ_EXT: &str = ".tar.gz"; +/// The extension for a zip archive. +const ZIP_EXT: &str = ".zip"; + /// An error that can occur while creating or using a cache. #[derive(thiserror::Error, Debug, Clone, Serialize, Diagnostic)] #[non_exhaustive] @@ -54,6 +62,31 @@ pub enum Error { /// The error message message: String, }, + + /// An invalid registry path. + #[error("The registry path `{path}` is invalid: {error}")] + InvalidRegistryPath { + /// The registry path + path: String, + /// The error message + error: String, + }, + + /// An invalid registry archive. + #[error("This archive `{archive}` is not supported. Supported formats are: .tar.gz, .zip")] + UnsupportedRegistryArchive { + /// The registry archive path + archive: String, + }, + + /// An invalid registry archive. + #[error("The registry archive `{archive}` is invalid: {error}")] + InvalidRegistryArchive { + /// The registry archive path + archive: String, + /// The error message + error: String, + }, } impl From for DiagnosticMessages { @@ -62,78 +95,78 @@ impl From for DiagnosticMessages { } } -/// A cache system for OTel Weaver. +/// A semantic convention registry repository that can be: +/// - A simple wrapper around a local directory +/// - Initialized from a Git repository +/// - Initialized from a Git archive #[derive(Default)] -pub struct Cache { +pub struct RegistryRepo { + // A unique identifier for the registry (e.g. main, baseline, etc.) + id: String, + registry_path: String, path: PathBuf, - git_repo_dirs: Mutex>, -} - -/// A git repo that is cloned into a tempdir. -struct GitRepo { - /// Need to allow dead code because we need to keep the tempdir live - /// for the lifetime of the GitRepo. + // Need to keep the tempdir live for the lifetime of the RegistryRepo. #[allow(dead_code)] - temp_dir: TempDir, - path: PathBuf, + tmp_dir: Option, } -impl Cache { - /// Creates the `.otel-weaver/cache` directory in the home directory. - /// This directory is used to store the semantic conventions, schemas - /// and other assets that are fetched from the network. - pub fn try_new() -> Result { - let home = dirs::home_dir().ok_or(Error::HomeDirNotFound)?; - let cache_path = home.join(".otel-weaver/cache"); - - create_dir_all(cache_path.as_path()).map_err(|e| Error::CacheDirNotCreated { - message: e.to_string(), - })?; - - Ok(Self { - path: cache_path, - ..Default::default() - }) - } - - /// The given repo_url is cloned into the cache and the path to the repo is returned. - /// The optional path parameter is relative to the root of the repo. - /// The intent is to allow the caller to specify a subdirectory of the repo and - /// use a sparse checkout once `gitoxide` supports it. In the meantime, the - /// path is checked to exist in the repo and an error is returned if it doesn't. - /// If the path exists in the repo, the returned pathbuf is the path to the - /// subdirectory in the git repo directory. - pub fn git_repo(&self, repo_url: String, path: Option) -> Result { - // Checks if a tempdir already exists for this repo - if let Some(git_repo_dir) = self - .git_repo_dirs - .lock() - .expect("git_repo_dirs lock failed") - .get(&repo_url) - { - if let Some(subdir) = path { - return Ok(git_repo_dir.path.join(subdir)); - } else { - return Ok(git_repo_dir.path.clone()); +impl RegistryRepo { + /// Creates a new `RegistryRepo` from a `RegistryPath` object that + /// specifies the location of the registry. + pub fn try_new(id: &str, registry_path: &RegistryPath) -> Result { + let registry_path_repr = registry_path.to_string(); + match registry_path { + RegistryPath::LocalFolder { path } => Ok(Self { + id: id.to_owned(), + registry_path: registry_path_repr, + path: path.into(), + tmp_dir: None, + }), + RegistryPath::GitRepo { + url, sub_folder, .. + } => Self::try_from_git_url(id, url, sub_folder, registry_path_repr), + RegistryPath::LocalArchive { path, sub_folder } => { + // Create a temporary directory for the repo that will be deleted + // when the RegistryRepo goes out of scope. + let tmp_dir = Self::create_tmp_repo()?; + Self::try_from_local_archive( + id, + path, + sub_folder.as_ref(), + tmp_dir, + registry_path_repr, + ) + } + RegistryPath::RemoteArchive { url, sub_folder } => { + // Create a temporary directory for the repo that will be deleted + // when the RegistryRepo goes out of scope. + let tmp_dir = Self::create_tmp_repo()?; + Self::try_from_remote_archive( + id, + url, + sub_folder.as_ref(), + tmp_dir, + registry_path_repr, + ) } } + } - // Otherwise creates a tempdir for the repo and keeps track of it - // in the git_repo_dirs hashmap. - let git_repo_dir = TempDir::new_in(self.path.as_path(), "git-repo").map_err(|e| { - Error::GitRepoNotCreated { - repo_url: repo_url.clone(), - message: e.to_string(), - } - })?; - let git_repo_pathbuf = git_repo_dir.path().to_path_buf(); - let git_repo_path = git_repo_pathbuf.as_path(); + /// Creates a new `RegistryRepo` from a Git URL. + fn try_from_git_url( + id: &str, + url: &str, + sub_folder: &Option, + registry_path: String, + ) -> Result { + let tmp_dir = Self::create_tmp_repo()?; + let tmp_path = tmp_dir.path().to_path_buf(); - // Clones the repo into the tempdir. + // Clones the repo into the temporary directory. // Use shallow clone to save time and space. let mut fetch = PrepareFetch::new( - repo_url.as_str(), - git_repo_path, + url, + tmp_path.clone(), Kind::WithWorktree, create::Options { destination_must_be_empty: true, @@ -142,7 +175,7 @@ impl Cache { open::Options::isolated(), ) .map_err(|e| GitError { - repo_url: repo_url.clone(), + repo_url: url.to_owned(), message: e.to_string(), })? .with_shallow(Shallow::DepthAtRemote( @@ -152,47 +185,346 @@ impl Cache { let (mut prepare, _outcome) = fetch .fetch_then_checkout(progress::Discard, &AtomicBool::new(false)) .map_err(|e| GitError { - repo_url: repo_url.clone(), + repo_url: url.to_owned(), message: e.to_string(), })?; let (_repo, _outcome) = prepare .main_worktree(progress::Discard, &AtomicBool::new(false)) .map_err(|e| GitError { - repo_url: repo_url.clone(), + repo_url: url.to_owned(), message: e.to_string(), })?; - // Determines the path to the repo. - let git_repo_path = if let Some(path) = &path { + // Determines the final path to the repo taking into account the sub_folder. + let path = if let Some(sub_folder) = sub_folder { + let path_to_repo = tmp_path.join(sub_folder); + // Checks the existence of the path in the repo. // If the path doesn't exist, returns an error. - if !git_repo_path.join(path).exists() { + if !path_to_repo.exists() { return Err(GitError { - repo_url: repo_url.clone(), - message: format!("Path `{}` not found in repo", path), + repo_url: url.to_owned(), + message: format!("Path `{}` not found in repo", sub_folder), }); } - git_repo_path.join(path) + path_to_repo + } else { + tmp_path + }; + + Ok(Self { + id: id.to_owned(), + registry_path, + path, + tmp_dir: Some(tmp_dir), + }) + } + + /// Create a new `RegistryRepo` from a local archive. + /// The archive can be in `.tar.gz` or `.zip` format. + /// The sub_folder is used to filter the entries inside the archive to unpack. + /// The temporary directory is created in the `.weaver/semconv_registry_cache`. + /// The temporary directory is deleted when the `RegistryRepo` goes out of scope. + /// + /// Arguments: + /// - `id`: The unique identifier for the registry. + /// - `archive_filename`: The path to the archive file. + /// - `sub_folder`: The sub-folder to unpack inside the archive. + /// - `target_dir`: The temporary target directory where the archive will be unpacked. + /// - `registry_path`: The registry path representation (for debug purposes). + fn try_from_local_archive( + id: &str, + archive_filename: &str, + sub_folder: Option<&String>, + target_dir: TempDir, + registry_path: String, + ) -> Result { + let archive_path = Path::new(archive_filename); + if !archive_path.exists() { + return Err(InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: "This archive file doesn't exist".to_owned(), + }); + } + let archive_file = File::open(archive_path).map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + let target_path_buf = target_dir.path().to_path_buf(); + + // Process the supported formats (i.e.: `.tar.gz`, and `.zip`) + if archive_filename.ends_with(TAR_GZ_EXT) { + Self::unpack_tar_gz(archive_filename, archive_file, &target_path_buf, sub_folder)?; + } else if archive_filename.ends_with(ZIP_EXT) { + Self::unpack_zip(archive_filename, archive_file, &target_path_buf, sub_folder)?; } else { - git_repo_path.to_path_buf() + return Err(UnsupportedRegistryArchive { + archive: archive_filename.to_owned(), + }); }; - // Adds the repo to the git_repo_dirs hashmap. - _ = self - .git_repo_dirs - .lock() - .expect("git_repo_dirs lock failed") - .insert( - repo_url.clone(), - GitRepo { - temp_dir: git_repo_dir, - path: git_repo_path.clone(), - }, - ); - - Ok(git_repo_path) + Ok(Self { + id: id.to_owned(), + registry_path, + path: target_path_buf, + tmp_dir: Some(target_dir), + }) + } + + /// Returns the unique identifier for the registry. + #[must_use] + pub fn id(&self) -> &str { + &self.id + } + + /// Unpacks a tar.gz archive into the specified target directory. + /// + /// This first directory in the archive is skipped as it is the directory corresponding to the + /// archive itself. The sub_folder is used to filter the entries to unpack. The sub_folder + /// directory is also skipped in the folder hierarchy to only unpack the content of the + /// sub-folder. + fn unpack_tar_gz( + archive_filename: &str, + archive_file: File, + target_path: &Path, + sub_folder: Option<&String>, + ) -> Result<(), Error> { + let tar_file = flate2::read::GzDecoder::new(archive_file); + let mut archive = tar::Archive::new(tar_file); + + for entry in archive.entries().map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })? { + let mut entry = entry.map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + + let path = entry.path().map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + + if let Some(valid_entry_path) = Self::path_to_unpack(&path, sub_folder, target_path) { + Self::create_parent_dirs(&valid_entry_path, archive_filename)?; + // Unpack returns an Unpacked type containing the file descriptor to the + // unpacked file. The file descriptor is ignored as we don't have any use for it. + _ = entry + .unpack(valid_entry_path) + .map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + } + } + Ok(()) + } + + /// Unpacks a zip archive into the specified target directory. + /// + /// This first directory in the archive is skipped as it is the directory corresponding to the + /// archive itself. The sub_folder is used to filter the entries to unpack. The sub_folder + /// directory is also skipped in the folder hierarchy to only unpack the content of the + /// sub-folder. + fn unpack_zip( + archive_filename: &str, + archive_file: File, + tmp_path: &Path, + sub_folder: Option<&String>, + ) -> Result<(), Error> { + let mut archive = + zip::ZipArchive::new(archive_file).map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + + for i in 0..archive.len() { + let mut entry = archive.by_index(i).map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + + if let Some(path) = entry.enclosed_name() { + if let Some(valid_entry_path) = Self::path_to_unpack(&path, sub_folder, tmp_path) { + Self::create_parent_dirs(&valid_entry_path, archive_filename)?; + + if entry.is_dir() { + create_dir_all(&valid_entry_path).map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + } else { + let mut outfile = File::create(&valid_entry_path).map_err(|e| { + InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + } + })?; + // Copy the content of the entry to the output file. + // `io::copy` returns the number of bytes copied, but it is ignored here + // as the function will return an error if the copy fails. + _ = io::copy(&mut entry, &mut outfile).map_err(|e| { + InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + } + })?; + } + } + } + } + Ok(()) + } + + /// Returns the corrected path to unpack from an entry in the archive knowing: + /// - the top-level directory in the archive corresponds to the initial directory archived + /// - the sub-folder in the archive to unpack + fn path_to_unpack( + entry_path: &Path, + sub_folder: Option<&String>, + target_path: &Path, + ) -> Option { + let mut components = entry_path.components(); + + // Skip the first component, i.e. the top-level directory in the archive that + // corresponds to the initial directory archived. + _ = components.next(); + + // If a sub-folder is specified, skip entries not in the sub-folder. + if let Some(sub_folder) = sub_folder { + if !sub_folder.trim().is_empty() { + // Skip any entry that is not in the sub-folder. + // If the entry is in the sub-folder, the sub-folder component is skipped. + let component = components.next(); + if let Some(component) = component { + if component.as_os_str() != sub_folder.as_str() { + return None; // Skip entries not in the sub-folder + } + } + } + } + Some(target_path.join(components.collect::())) + } + + /// Creates parent directories for the given path. + fn create_parent_dirs(new_path: &Path, archive_filename: &str) -> Result<(), Error> { + if let Some(parent) = new_path.parent() { + create_dir_all(parent).map_err(|e| InvalidRegistryArchive { + archive: archive_filename.to_owned(), + error: e.to_string(), + })?; + } + Ok(()) + } + + /// Create a new `RegistryRepo` from a remote archive. + /// + /// The archive can be in `.tar.gz` or `.zip` format. + /// The sub_folder is used to filter the entries inside the archive to unpack. + /// The temporary directory is created in the `.weaver/semconv_registry_cache`. + /// The temporary directory is deleted when the `RegistryRepo` goes out of scope. + /// + /// Arguments: + /// - `id`: The unique identifier for the registry. + /// - `url`: The URL of the archive. + /// - `sub_folder`: The sub-folder to unpack inside the archive. + /// - `target_dir`: The temporary target directory where the archive will be unpacked. + /// - `registry_path`: The registry path representation (for debug purposes). + fn try_from_remote_archive( + id: &str, + url: &str, + sub_folder: Option<&String>, + target_dir: TempDir, + registry_path: String, + ) -> Result { + let tmp_path = target_dir.path().to_path_buf(); + + // Download the archive from the URL + let response = ureq::get(url).call().map_err(|e| InvalidRegistryArchive { + archive: url.to_owned(), + error: e.to_string(), + })?; + if response.status() != 200 { + return Err(InvalidRegistryArchive { + archive: url.to_owned(), + error: format!("HTTP status code: {}", response.status()), + }); + } + + // Parse the URL to get the file name + let parsed_url = Url::parse(url).map_err(|e| InvalidRegistryArchive { + archive: url.to_owned(), + error: e.to_string(), + })?; + let file_name = parsed_url + .path_segments() + .and_then(|segments| segments.last()) + .and_then(|name| if name.is_empty() { None } else { Some(name) }) + .ok_or("Failed to extract file name from URL") + .map_err(|e| InvalidRegistryArchive { + archive: url.to_owned(), + error: e.to_owned(), + })?; + + // Create the full path to the save file + let save_path = tmp_path.join(file_name); + + // Open a file in write mode + let mut file = File::create(save_path.clone()).map_err(|e| InvalidRegistryArchive { + archive: url.to_owned(), + error: e.to_string(), + })?; + + // Write the response body to the file. + // The number of bytes written is ignored as the `try_from_local_archive` function + // will handle the archive extraction and return an error if the archive is invalid. + _ = io::copy(&mut response.into_reader(), &mut file).map_err(|e| { + InvalidRegistryArchive { + archive: url.to_owned(), + error: e.to_string(), + } + })?; + + Self::try_from_local_archive( + id, + save_path.to_str().unwrap_or_default(), + sub_folder, + target_dir, + registry_path, + ) + } + + /// Returns the local path to the semconv registry. + #[must_use] + pub fn path(&self) -> &Path { + self.path.as_path() + } + + /// Returns the registry path textual representation. + #[must_use] + pub fn registry_path_repr(&self) -> &str { + &self.registry_path + } + + /// Creates a temporary directory for the registry repository and returns the path. + /// The temporary directory is created in the `.weaver/semconv_registry_cache`. + fn create_tmp_repo() -> Result { + let home = dirs::home_dir().ok_or(Error::HomeDirNotFound)?; + let cache_path = home.join(".weaver/semconv_registry_cache"); + + create_dir_all(cache_path.as_path()).map_err(|e| Error::CacheDirNotCreated { + message: e.to_string(), + })?; + + let tmp_dir = TempDir::new_in(cache_path.as_path(), "repo").map_err(|e| { + Error::CacheDirNotCreated { + message: e.to_string(), + } + })?; + Ok(tmp_dir) } } @@ -200,17 +532,91 @@ impl Cache { mod tests { use super::*; - /// Marked as ignore because we don't want to clone the repo every - /// time we run the tests in CI. + fn count_yaml_files(repo_path: &Path) -> usize { + let count = walkdir::WalkDir::new(repo_path) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.path().extension().map_or(false, |ext| ext == "yaml")) + .count(); + count + } + #[test] - #[ignore] - fn test_cache() { - let cache = Cache::try_new().unwrap(); - let result = cache.git_repo( - "https://github.com/open-telemetry/semantic-conventions.git".into(), - Some("model".into()), + fn test_semconv_registry_local_repo() { + // A RegistryRepo created from a local folder. + let registry_path = RegistryPath::LocalFolder { + path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), + }; + let repo = RegistryRepo::try_new("main", ®istry_path).unwrap(); + let repo_path = repo.path().to_path_buf(); + assert!(repo_path.exists()); + assert!( + count_yaml_files(&repo_path) > 0, + "There should be at least one `.yaml` file in the repo" + ); + // Simulate a RegistryRepo going out of scope. + drop(repo); + // The local folder should not be deleted. + assert!(repo_path.exists()); + } + + fn check_archive(registry_path: RegistryPath, file_to_check: Option<&str>) { + let repo = RegistryRepo::try_new("main", ®istry_path).unwrap(); + let repo_path = repo.path().to_path_buf(); + // At this point, the repo should be cloned into a temporary directory. + assert!(repo_path.exists()); + assert!( + count_yaml_files(&repo_path) > 0, + "There should be at least one `.yaml` file in the repo" ); - assert!(result.is_ok()); - assert!(result.unwrap().exists()); + if let Some(file_to_check) = file_to_check { + let file_path = repo_path.join(file_to_check); + assert!(file_path.exists()); + } + // Simulate a RegistryRepo going out of scope. + drop(repo); + // The temporary directory should be deleted automatically. + assert!(!repo_path.exists()); + } + + #[test] + fn test_semconv_registry_git_repo() { + let registry_path = RegistryPath::GitRepo { + // This git repo is expected to be available. + url: "https://github.com/open-telemetry/semantic-conventions.git".to_owned(), + sub_folder: Some("model".to_owned()), + refspec: None, + }; + check_archive(registry_path, None); + } + + #[test] + fn test_semconv_registry_local_tar_gz_archive() { + let registry_path = "../../test_data/semantic-conventions-1.26.0.tar.gz[model]" + .parse::() + .unwrap(); + check_archive(registry_path, Some("general.yaml")); + } + + #[test] + fn test_semconv_registry_local_zip_archive() { + let registry_path = "../../test_data/semantic-conventions-1.26.0.zip[model]" + .parse::() + .unwrap(); + check_archive(registry_path, Some("general.yaml")); + } + + #[test] + fn test_semconv_registry_remote_tar_gz_archive() { + let registry_path = "https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.26.0.tar.gz[model]" + .parse::().unwrap(); + check_archive(registry_path, Some("general.yaml")); + } + + #[test] + fn test_semconv_registry_remote_zip_archive() { + let registry_path = "https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.26.0.zip[model]" + .parse::().unwrap(); + check_archive(registry_path, Some("general.yaml")); } } diff --git a/crates/weaver_cache/src/registry_path.rs b/crates/weaver_cache/src/registry_path.rs new file mode 100644 index 00000000..7d6a574d --- /dev/null +++ b/crates/weaver_cache/src/registry_path.rs @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! The representation of a semantic convention registry path/location. + +use std::fmt::Display; +use std::str::FromStr; + +use once_cell::sync::Lazy; +use regex::Regex; + +use crate::Error; + +/// Regex to parse a registry path supporting the following formats: +/// - source +/// - source@tag +/// - source\[sub_folder\] +/// - source@tag\[sub_folder\] +static REGISTRY_REGEX: Lazy = Lazy::new(|| { + Regex::new(r"^(?P.+?)(?:@(?P.+?))?(?:\[(?P.+?)])?$") + .expect("Invalid regex") +}); + +/// Path to a semantic convention registry. +/// The path can be a local directory or a Git URL. +#[derive(Debug, Clone)] +pub enum RegistryPath { + /// Local folder path pointing to a semantic convention registry. + LocalFolder { + /// Path to a local folder + path: String, + }, + /// Local archive path containing a semantic convention registry. + LocalArchive { + /// Path to a local archive + path: String, + /// Sub-folder within the archive containing the semantic convention registry + sub_folder: Option, + }, + /// Remote archive containing a semantic convention registry. + RemoteArchive { + /// URL of the remote archive + url: String, + /// Sub-folder within the archive containing the semantic convention registry + sub_folder: Option, + }, + /// Git repository containing a semantic convention registry. + GitRepo { + /// URL of the Git repository + url: String, + /// Specific Tag, Branch, or Commit of the Git repository (NOT YET SUPPORTED) + refspec: Option, + /// Sub-folder within the repository containing the semantic convention registry + sub_folder: Option, + }, +} + +/// Implement the `FromStr` trait for `RegistryPath`, so that it can be used as +/// a command-line argument. +impl FromStr for RegistryPath { + type Err = Error; + + /// Parse a string into a `RegistryPath`. + fn from_str(s: &str) -> Result { + let captures = REGISTRY_REGEX + .captures(s) + .ok_or(Error::InvalidRegistryPath { + path: s.to_owned(), + error: "Invalid registry path".to_owned(), + })?; + let source = captures + .name("source") + .ok_or(Error::InvalidRegistryPath { + path: s.to_owned(), + error: "Invalid registry path. No local path or URL found".to_owned(), + })? + .as_str(); + let refspec = captures.name("refspec").map(|m| m.as_str().to_owned()); + let sub_folder = captures.name("sub_folder").map(|m| m.as_str().to_owned()); + + if source.starts_with("http://") || source.starts_with("https://") { + if source.ends_with(".zip") || source.ends_with(".tar.gz") { + Ok(Self::RemoteArchive { + url: source.to_owned(), + sub_folder, + }) + } else { + Ok(Self::GitRepo { + url: source.to_owned(), + refspec, + sub_folder, + }) + } + } else if source.ends_with(".zip") || source.ends_with(".tar.gz") { + Ok(Self::LocalArchive { + path: source.to_owned(), + sub_folder, + }) + } else { + Ok(Self::LocalFolder { + path: source.to_owned(), + }) + } + } +} + +/// Implement the `Display` trait for `RegistryPath`, so that it can be printed +/// to the console. +impl Display for RegistryPath { + /// Format the `RegistryPath` as a string. + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RegistryPath::LocalFolder { path } => write!(f, "{}", path), + RegistryPath::LocalArchive { path, sub_folder } => { + if let Some(sub_folder) = sub_folder { + write!(f, "{}[{}]", path, sub_folder) + } else { + write!(f, "{}", path) + } + } + RegistryPath::RemoteArchive { url, sub_folder } => { + if let Some(sub_folder) = sub_folder { + write!(f, "{}[{}]", url, sub_folder) + } else { + write!(f, "{}", url) + } + } + RegistryPath::GitRepo { + url, + refspec, + sub_folder, + } => match (refspec, sub_folder) { + (Some(refspec), Some(folder)) => write!(f, "{}@{}[{}]", url, refspec, folder), + (Some(refspec), None) => write!(f, "{}@{}", url, refspec), + (None, Some(folder)) => write!(f, "{}[{}]", url, folder), + (None, None) => write!(f, "{}", url), + }, + } + } +} + +#[cfg(test)] +mod tests { + use crate::registry_path::RegistryPath; + + #[test] + fn test_registry_path() { + // Local folder + let registry_path_str = "path/to/registry"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::LocalFolder { path } = ®istry_path { + assert_eq!(path, registry_path_str); + } else { + panic!("Expected LocalFolder, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Local archive (zip) + let registry_path_str = "http://example.com/registry.zip"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::RemoteArchive { url, sub_folder } = ®istry_path { + assert_eq!(url, registry_path_str); + assert_eq!(*sub_folder, None); + } else { + panic!("Expected RemoteArchive, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Local archive with sub-folder (zip) + let registry_path_str = "http://example.com/registry.zip[model]"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::RemoteArchive { url, sub_folder } = ®istry_path { + assert_eq!(url, "http://example.com/registry.zip"); + assert_eq!(*sub_folder, Some("model".to_owned())); + } else { + panic!("Expected RemoteArchive, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Local archive (tar.gz) + let registry_path_str = "http://example.com/registry.tar.gz"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::RemoteArchive { url, sub_folder } = ®istry_path { + assert_eq!(url, registry_path_str); + assert_eq!(*sub_folder, None); + } else { + panic!("Expected RemoteArchive, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Local archive with sub-folder (tar.gz) + let registry_path_str = "http://example.com/registry.tar.gz[model]"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::RemoteArchive { url, sub_folder } = ®istry_path { + assert_eq!(url, "http://example.com/registry.tar.gz"); + assert_eq!(*sub_folder, Some("model".to_owned())); + } else { + panic!("Expected RemoteArchive, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Git repository + let registry_path_str = "http://example.com/registry.git"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::GitRepo { + url, + refspec, + sub_folder, + } = ®istry_path + { + assert_eq!(url, registry_path_str); + assert_eq!(*refspec, None); + assert_eq!(*sub_folder, None); + } else { + panic!("Expected GitRepo, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Git repository with sub-folder + let registry_path_str = "http://example.com/registry.git[model]"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::GitRepo { + url, + refspec, + sub_folder, + } = ®istry_path + { + assert_eq!(url, "http://example.com/registry.git"); + assert_eq!(*refspec, None); + assert_eq!(*sub_folder, Some("model".to_owned())); + } else { + panic!("Expected GitRepo, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Git repository with tag + let registry_path_str = "http://example.com/registry.git@v1.0.0"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::GitRepo { + url, + refspec, + sub_folder, + } = ®istry_path + { + assert_eq!(url, "http://example.com/registry.git"); + assert_eq!(*refspec, Some("v1.0.0".to_owned())); + assert_eq!(*sub_folder, None); + } else { + panic!("Expected GitRepo, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + + // Git repository with tag and sub-folder + let registry_path_str = "http://example.com/registry.git@v1.0.0[model]"; + let registry_path: RegistryPath = registry_path_str.parse().unwrap(); + if let RegistryPath::GitRepo { + url, + refspec, + sub_folder, + } = ®istry_path + { + assert_eq!(url, "http://example.com/registry.git"); + assert_eq!(*refspec, Some("v1.0.0".to_owned())); + assert_eq!(*sub_folder, Some("model".to_owned())); + } else { + panic!("Expected GitRepo, got something else"); + } + assert_eq!(registry_path.to_string(), registry_path_str); + } +} diff --git a/crates/weaver_checker/README.md b/crates/weaver_checker/README.md index f208597d..0f0735d7 100644 --- a/crates/weaver_checker/README.md +++ b/crates/weaver_checker/README.md @@ -71,9 +71,9 @@ The following diagram illustrates the policy verification process: ## Policy Stages Policies can be applied at two different stages of the resolution process. 1) To apply policies before the resolution process, simply group the policies -into a package named `before_resolution`. + into a package named `before_resolution`. 2) To apply them after the resolution process, the `after_resolution` package -should be used + should be used The example below presents a set of violation detection rules that will apply before the validation process. diff --git a/crates/weaver_checker/src/lib.rs b/crates/weaver_checker/src/lib.rs index 94ea90ab..b946ba3f 100644 --- a/crates/weaver_checker/src/lib.rs +++ b/crates/weaver_checker/src/lib.rs @@ -21,6 +21,9 @@ use crate::Error::CompoundError; pub mod violation; +/// Default semconv rules/functions for the semantic convention registry. +pub const SEMCONV_REGO: &str = include_str!("../../../defaults/rego/semconv.rego"); + /// An error that can occur while evaluating policies. #[derive(thiserror::Error, Debug, Serialize, Diagnostic, Clone)] #[must_use] @@ -126,6 +129,8 @@ pub enum PolicyStage { BeforeResolution, /// Policies that are evaluated after resolution. AfterResolution, + /// Policies that are evaluated between two registries the resolution phase. + ComparisonAfterResolution, } impl Display for PolicyStage { @@ -138,6 +143,9 @@ impl Display for PolicyStage { PolicyStage::AfterResolution => { write!(f, "after_resolution") } + PolicyStage::ComparisonAfterResolution => { + write!(f, "comparison_after_resolution") + } } } } @@ -169,13 +177,48 @@ impl Engine { self.coverage_enabled = true; } + /// Adds a rego policy (content) to the policy engine. + /// + /// # Arguments + /// + /// * `path` - The path to the policy (used for error messages). + /// * `rego` - The content of the rego policy. + /// + /// # Returns + /// + /// The policy package name. + pub fn add_policy(&mut self, path: &str, rego: &str) -> Result { + let policy_package = self + .engine + .add_policy(path.to_owned(), rego.to_owned()) + .map_err(|e| Error::InvalidPolicyFile { + file: path.to_owned(), + error: e.to_string(), + }) + .inspect(|_| { + self.policy_package_count += 1; + })?; + // Add the policy package defined in the imported policy file. + // Nothing prevent multiple policy files to import the same policy package. + // All the rules will be combined and evaluated together. + _ = self.policy_packages.insert(policy_package.clone()); + Ok(policy_package) + } + /// Adds a policy file to the policy engine. /// A policy file is a `rego` file that contains the policies to be evaluated. /// /// # Arguments /// /// * `policy_path` - The path to the policy file. - pub fn add_policy>(&mut self, policy_path: P) -> Result { + /// + /// # Returns + /// + /// The policy package name. + pub fn add_policy_from_file>( + &mut self, + policy_path: P, + ) -> Result { let policy_path_str = policy_path.as_ref().to_string_lossy().to_string(); let policy_package = self @@ -237,7 +280,7 @@ impl Engine { continue; } if is_policy_file(&entry) { - if let Err(err) = self.add_policy(entry.path()) { + if let Err(err) = self.add_policy_from_file(entry.path()) { errors.push(err); } else { added_policy_count += 1; @@ -372,7 +415,7 @@ mod tests { #[test] fn test_policy() -> Result<(), Box> { let mut engine = Engine::new(); - let policy_package = engine.add_policy("data/policies/otel_policies.rego")?; + let policy_package = engine.add_policy_from_file("data/policies/otel_policies.rego")?; assert_eq!(policy_package, "data.before_resolution"); let old_semconv = std::fs::read_to_string("data/registries/registry.network.old.yaml")?; @@ -420,7 +463,7 @@ mod tests { #[test] fn test_invalid_policy() { let mut engine = Engine::new(); - let result = engine.add_policy("data/policies/invalid_policy.rego"); + let result = engine.add_policy_from_file("data/policies/invalid_policy.rego"); assert!(result.is_err()); } @@ -435,7 +478,7 @@ mod tests { fn test_invalid_violation_object() { let mut engine = Engine::new(); _ = engine - .add_policy("data/policies/invalid_violation_object.rego") + .add_policy_from_file("data/policies/invalid_violation_object.rego") .unwrap(); let new_semconv = diff --git a/crates/weaver_codegen_test/Cargo.toml b/crates/weaver_codegen_test/Cargo.toml index dae1928b..87a1469e 100644 --- a/crates/weaver_codegen_test/Cargo.toml +++ b/crates/weaver_codegen_test/Cargo.toml @@ -8,6 +8,9 @@ publish.workspace = true edition.workspace = true rust-version.workspace = true +[features] +semconv_experimental = [] + [lints] workspace = true @@ -21,4 +24,4 @@ walkdir.workspace = true dirs.workspace = true [dependencies] -opentelemetry = { version = "0.23.0", features = ["trace", "metrics", "logs", "otel_unstable"] } \ No newline at end of file +opentelemetry.workspace = true \ No newline at end of file diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 9f82f2ac..f2eaa21d 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -10,7 +10,8 @@ use std::collections::HashMap; use std::io::Write; use std::path::{Component, Path, PathBuf}; use std::process::exit; -use weaver_cache::Cache; +use weaver_cache::registry_path::RegistryPath; +use weaver_cache::RegistryRepo; use weaver_common::in_memory::LogMessage; use weaver_common::{in_memory, Logger}; use weaver_forge::config::{Params, WeaverConfig}; @@ -18,7 +19,6 @@ use weaver_forge::file_loader::FileSystemFileLoader; use weaver_forge::registry::ResolvedRegistry; use weaver_forge::{OutputDirective, TemplateEngine, SEMCONV_JQ}; use weaver_resolver::SchemaResolver; -use weaver_semconv::path::RegistryPath; use weaver_semconv::registry::SemConvRegistry; const SEMCONV_REGISTRY_PATH: &str = "./semconv_registry/"; @@ -39,11 +39,12 @@ fn main() { let logger = in_memory::Logger::new(0); // Load and resolve the semantic convention registry - let cache = Cache::try_new().unwrap_or_else(|e| process_error(&logger, e)); - let registry_path = RegistryPath::Local { - path_pattern: SEMCONV_REGISTRY_PATH.into(), + let registry_path = RegistryPath::LocalFolder { + path: SEMCONV_REGISTRY_PATH.into(), }; - let semconv_specs = SchemaResolver::load_semconv_specs(®istry_path, &cache) + let registry_repo = + RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); + let semconv_specs = SchemaResolver::load_semconv_specs(®istry_repo) .unwrap_or_else(|e| process_error(&logger, e)); let mut registry = SemConvRegistry::from_semconv_specs(REGISTRY_ID, semconv_specs); let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry) diff --git a/crates/weaver_common/src/lib.rs b/crates/weaver_common/src/lib.rs index f2bf73f7..f9917ad5 100644 --- a/crates/weaver_common/src/lib.rs +++ b/crates/weaver_common/src/lib.rs @@ -63,9 +63,9 @@ pub struct ConsoleLogger { /// /// Ordering logic: /// - Ordering::Acquire in load: Ensures that when a thread reads the muted flag, it sees all - /// preceding writes to that flag by other threads. + /// preceding writes to that flag by other threads. /// - Ordering::Release in store: Ensures that when a thread sets the muted flag, the store - /// operation is visible to other threads that subsequently perform an acquire load. + /// operation is visible to other threads that subsequently perform an acquire load. mute: Arc, } diff --git a/crates/weaver_forge/Cargo.toml b/crates/weaver_forge/Cargo.toml index c7dd26af..a2a08761 100644 --- a/crates/weaver_forge/Cargo.toml +++ b/crates/weaver_forge/Cargo.toml @@ -44,7 +44,7 @@ schemars.workspace = true dirs.workspace = true [dev-dependencies] -opentelemetry = { version = "0.22.0", features = ["trace", "metrics", "logs", "otel_unstable"] } +opentelemetry.workspace = true opentelemetry_sdk = { version = "0.24.0", features = ["trace", "metrics", "logs"] } opentelemetry-stdout = { version = "0.5.0", features = ["trace", "metrics", "logs"] } diff --git a/crates/weaver_forge/README.md b/crates/weaver_forge/README.md index ea832e37..94f8e601 100644 --- a/crates/weaver_forge/README.md +++ b/crates/weaver_forge/README.md @@ -102,7 +102,7 @@ mechanism, loading configuration files in this order: - `$HOME/.weaver/weaver.yaml` - `/weaver.yaml` and any intermediate directories containing a `weaver.yaml` -file up to the `templates/registry/` directory. + file up to the `templates/registry/` directory. - `templates/registry//weaver.yaml` Each subsequent configuration file overrides the previous ones, up to the @@ -145,11 +145,11 @@ More details [here](#jq-filters-reference). All templates have access to the following global variables: - `ctx`: The context object that contains the resolved registry or the output of the JQ filter -if defined in the `weaver.yaml` configuration file. + if defined in the `weaver.yaml` configuration file. - `params`: The parameters defined in the `weaver.yaml` configuration file or overridden by the -command line `--param`, `-D`, or `--params` arguments. + command line `--param`, `-D`, or `--params` arguments. - `template`: An object exposing various helper functions such as the `set_file_name` method to -redefine the name of the file that will be produced from the template. + redefine the name of the file that will be produced from the template. ## Step-by-Step Guide diff --git a/crates/weaver_forge/src/config.rs b/crates/weaver_forge/src/config.rs index c94c926f..80d7cbe9 100644 --- a/crates/weaver_forge/src/config.rs +++ b/crates/weaver_forge/src/config.rs @@ -312,7 +312,7 @@ impl WeaverConfig { /// /// 1. The `/weaver.yaml` file. /// 2. Any `weaver.yaml` files found in parent directories of the specified path, up to the root - /// directory. + /// directory. /// 3. The `$HOME/.weaver/weaver.yaml` file. pub fn try_from_path>(path: P) -> Result { let configs = Self::collect_from_path(path); diff --git a/crates/weaver_resolver/README.md b/crates/weaver_resolver/README.md index 5a8bfe50..02cbcc90 100644 --- a/crates/weaver_resolver/README.md +++ b/crates/weaver_resolver/README.md @@ -61,13 +61,13 @@ a semantic convention registry. The lineage as such is not part of the syntax and structure of a semantic convention; rather, it's an extension produced by the `weaver` tool, intended for use in scenarios such as: - A semconv author wishes to verify the exact path followed by the resolution -process in the case of a complex cascade of inheritance across multiple levels -between groups. + process in the case of a complex cascade of inheritance across multiple levels + between groups. - A documentation process aims to add lineage information to the documentation -of each attribute to allow readers to understand where the definitions of each -field were defined. + of each attribute to allow readers to understand where the definitions of each + field were defined. - The lineage information of a semantic convention could eventually feed into -an enterprise data catalog to improve the data governance process. + an enterprise data catalog to improve the data governance process. The general structure of the lineage generated by the resolution process is as follows: diff --git a/crates/weaver_resolver/allowed-external-types.toml b/crates/weaver_resolver/allowed-external-types.toml index 89ff68f7..bf495562 100644 --- a/crates/weaver_resolver/allowed-external-types.toml +++ b/crates/weaver_resolver/allowed-external-types.toml @@ -10,6 +10,6 @@ allowed_external_types = [ "weaver_resolved_schema::*", "weaver_semconv::*", "weaver_common::*", - "weaver_cache::Cache", + "weaver_cache::*", "weaver_version::*", ] \ No newline at end of file diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 6d8b1948..c972ac73 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -11,14 +11,13 @@ use rayon::iter::ParallelIterator; use serde::Serialize; use walkdir::DirEntry; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, handle_errors, WeaverError}; use weaver_common::Logger; use weaver_resolved_schema::catalog::Catalog; use weaver_resolved_schema::registry::Constraint; use weaver_resolved_schema::ResolvedTelemetrySchema; -use weaver_semconv::path::RegistryPath; use weaver_semconv::registry::SemConvRegistry; use weaver_semconv::semconv::SemConvSpec; @@ -236,30 +235,12 @@ impl SchemaResolver { /// * `registry_path` - The registry path containing the semantic convention files. /// * `cache` - The cache to store the semantic convention files. pub fn load_semconv_specs( - registry_path: &RegistryPath, - cache: &Cache, + registry_repo: &RegistryRepo, ) -> Result, Error> { - let (local_path, registry_path_repr) = Self::path_to_registry(registry_path, cache)?; - Self::load_semconv_from_local_path(local_path, ®istry_path_repr) - } - - /// Returns a tuple absolute ['PathBuf'], logical registry path to the registry based on the - /// given ['RegistryPath'] and the cache. - pub fn path_to_registry( - registry_path: &RegistryPath, - cache: &Cache, - ) -> Result<(PathBuf, String), Error> { - match registry_path { - RegistryPath::Local { path_pattern: path } => Ok((path.into(), path.clone())), - RegistryPath::GitUrl { git_url, path } => { - match cache.git_repo(git_url.clone(), path.clone()) { - Ok(local_git_repo) => Ok((local_git_repo, git_url.clone())), - Err(e) => Err(Error::SemConvError { - message: e.to_string(), - }), - } - } - } + Self::load_semconv_from_local_path( + registry_repo.path().to_path_buf(), + registry_repo.registry_path_repr(), + ) } /// Loads the semantic convention specifications from the given local path. diff --git a/crates/weaver_semconv/README.md b/crates/weaver_semconv/README.md index 8e5215eb..86bac3aa 100644 --- a/crates/weaver_semconv/README.md +++ b/crates/weaver_semconv/README.md @@ -15,8 +15,8 @@ For a formal definition of the allowed syntax, see the [build-tools JSON schema] # Design Principles - Collect as many warnings and errors as possible. Do not stop at the first error; this approach helps the user fix -multiple issues at once. + multiple issues at once. - Rely on the Serde ecosystem for serialization and deserialization. This reliance simplifies support for multiple -formats such as YAML, JSON, etc. + formats such as YAML, JSON, etc. - This crate is foundational for the OpenTelemetry Weaver project. Therefore, it is crucial to keep the API stable and -user-friendly. Maintaining a test coverage greater than 80% is important. Test as many as possible error cases/paths. \ No newline at end of file + user-friendly. Maintaining a test coverage greater than 80% is important. Test as many as possible error cases/paths. \ No newline at end of file diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index 4b52ae4d..46736d5a 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -11,7 +11,6 @@ use weaver_common::error::{format_errors, WeaverError}; pub mod attribute; pub mod group; pub mod metric; -pub mod path; pub mod registry; pub mod semconv; pub mod stability; diff --git a/crates/weaver_semconv/src/path.rs b/crates/weaver_semconv/src/path.rs deleted file mode 100644 index 575d3a4d..00000000 --- a/crates/weaver_semconv/src/path.rs +++ /dev/null @@ -1,40 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -//! Semantic convention registry path. - -use std::fmt::{Display, Formatter}; - -use serde::{Deserialize, Serialize}; - -/// A semantic convention registry path. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -#[serde(untagged)] -pub enum RegistryPath { - /// A local path pattern to the semantic convention registry. - Local { - /// A local path pattern to the semantic convention files. - path_pattern: String, - }, - /// A git URL to the semantic convention registry. - GitUrl { - /// The git URL of the semantic convention git repo. - git_url: String, - /// An optional path to the semantic convention directory containing - /// the semantic convention files. - path: Option, - }, -} - -impl Display for RegistryPath { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let path = match self { - RegistryPath::Local { path_pattern } => format!("LocalRegistry:{}", path_pattern), - RegistryPath::GitUrl { git_url, path } => match path { - Some(path) => format!("GitRegistry:{}/{:?}", git_url, path), - None => format!("GitRegistry:{}", git_url), - }, - }; - f.write_str(&path) - } -} diff --git a/crates/weaver_semconv_gen/allowed-external-types.toml b/crates/weaver_semconv_gen/allowed-external-types.toml index 1e75f38d..ac615260 100644 --- a/crates/weaver_semconv_gen/allowed-external-types.toml +++ b/crates/weaver_semconv_gen/allowed-external-types.toml @@ -6,8 +6,7 @@ allowed_external_types = [ "weaver_semconv::Error", "weaver_semconv::path::RegistryPath", "weaver_resolver::Error", - "weaver_cache::Cache", - "weaver_cache::Error", + "weaver_cache::*", "weaver_common::error::WeaverError", "weaver_forge::error::Error", "weaver_forge::TemplateEngine", diff --git a/crates/weaver_semconv_gen/src/lib.rs b/crates/weaver_semconv_gen/src/lib.rs index 51ee9936..3a1dc358 100644 --- a/crates/weaver_semconv_gen/src/lib.rs +++ b/crates/weaver_semconv_gen/src/lib.rs @@ -8,7 +8,7 @@ use miette::Diagnostic; use std::{fmt, fs}; use serde::Serialize; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, WeaverError}; use weaver_diff::diff_output; @@ -19,7 +19,6 @@ use weaver_resolved_schema::catalog::Catalog; use weaver_resolved_schema::registry::{Group, Registry}; use weaver_resolved_schema::ResolvedTelemetrySchema; use weaver_resolver::SchemaResolver; -use weaver_semconv::path::RegistryPath; use weaver_semconv::registry::SemConvRegistry; use crate::gen::{AttributeTableView, GenerateMarkdownContext, MetricView}; @@ -347,34 +346,17 @@ impl SnippetGenerator { Ok(result) } - /// Resolve semconv registry (possibly from git), and make it available for rendering. - pub fn try_from_url( - registry_path: RegistryPath, - cache: &Cache, + /// Resolve semconv registry, and make it available for rendering. + pub fn try_from_registry_repo( + registry_repo: &RegistryRepo, template_engine: Option, ) -> Result { - let registry = ResolvedSemconvRegistry::try_from_url(registry_path, cache)?; + let registry = ResolvedSemconvRegistry::try_from_registry_repo(registry_repo)?; Ok(SnippetGenerator { lookup: registry, template_engine, }) } - - // Used in tests - #[allow(dead_code)] - fn try_from_path( - path_pattern: &str, - template_engine: Option, - ) -> Result { - let cache = Cache::try_new()?; - Self::try_from_url( - RegistryPath::Local { - path_pattern: path_pattern.to_owned(), - }, - &cache, - template_engine, - ) - } } /// The resolved Semantic Convention repository that is used to drive snipper generation. @@ -385,12 +367,11 @@ struct ResolvedSemconvRegistry { impl ResolvedSemconvRegistry { /// Resolve semconv registry (possibly from git), and make it available for rendering. - fn try_from_url( - registry_path: RegistryPath, - cache: &Cache, + fn try_from_registry_repo( + registry_repo: &RegistryRepo, ) -> Result { let registry_id = "semantic_conventions"; - let semconv_specs = SchemaResolver::load_semconv_specs(®istry_path, cache)?; + let semconv_specs = SchemaResolver::load_semconv_specs(registry_repo)?; let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry)?; let lookup = ResolvedSemconvRegistry { @@ -422,7 +403,8 @@ impl ResolvedSemconvRegistry { #[cfg(test)] mod tests { use std::fs; - + use weaver_cache::registry_path::RegistryPath; + use weaver_cache::RegistryRepo; use weaver_forge::config::{Params, WeaverConfig}; use weaver_forge::file_loader::FileSystemFileLoader; use weaver_forge::TemplateEngine; @@ -441,7 +423,11 @@ mod tests { let loader = FileSystemFileLoader::try_new("templates/registry".into(), "markdown")?; let config = WeaverConfig::try_from_loader(&loader)?; let template = TemplateEngine::new(config, loader, Params::default()); - let generator = SnippetGenerator::try_from_path("data", Some(template))?; + let registry_path = RegistryPath::LocalFolder { + path: "data".to_owned(), + }; + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let generator = SnippetGenerator::try_from_registry_repo(®istry_repo, Some(template))?; let attribute_registry_url = "/docs/attributes-registry"; // Now we should check a snippet. let test = "data/templates.md"; @@ -457,7 +443,11 @@ mod tests { #[test] fn test_http_semconv() -> Result<(), Error> { - let lookup = SnippetGenerator::try_from_path("data", None)?; + let registry_path = RegistryPath::LocalFolder { + path: "data".to_owned(), + }; + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let lookup = SnippetGenerator::try_from_registry_repo(®istry_repo, None)?; let attribute_registry_url = "/docs/attributes-registry"; // Check our test files. for test in [ @@ -492,8 +482,11 @@ mod tests { } fn run_legacy_test(path: std::path::PathBuf) -> Result<(), Error> { - let semconv_path = format!("{}", path.display()); - let lookup = SnippetGenerator::try_from_path(&semconv_path, None)?; + let registry_path = RegistryPath::LocalFolder { + path: format!("{}", path.display()), + }; + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let lookup = SnippetGenerator::try_from_registry_repo(®istry_repo, None)?; let test_path = path.join("test.md").display().to_string(); // Attempts to update the test - will fail if there is any difference in the generated markdown. update_markdown(&test_path, &lookup, true, None) diff --git a/crates/xtask/Cargo.toml b/crates/xtask/Cargo.toml index 3fb5cc39..f3a2efdf 100644 --- a/crates/xtask/Cargo.toml +++ b/crates/xtask/Cargo.toml @@ -12,5 +12,7 @@ rust-version.workspace = true anyhow.workspace = true toml = "0.8.15" -[lints] -workspace = true +#[lints] +#workspace = true +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] } diff --git a/crates/xtask/src/validate.rs b/crates/xtask/src/validate.rs index eabcb015..47aa12f0 100644 --- a/crates/xtask/src/validate.rs +++ b/crates/xtask/src/validate.rs @@ -13,11 +13,11 @@ use toml::Value; /// each crate in the cargo workspace. /// - Each crate must have a README.md file. /// - Each crate name must start with "weaver_" to avoid conflicts with other -/// crates. +/// crates. /// - Each crate must have an allowed-external-types.toml file defining the types -/// that are allowed to be used in the public API. +/// that are allowed to be used in the public API. /// - Each Cargo.toml must contain \[lints\] workspace = true and few other fields -/// in the \[package\] section. +/// in the \[package\] section. #[cfg(not(tarpaulin_include))] pub fn run() -> anyhow::Result<()> { let mut errors = vec![]; diff --git a/defaults/diagnostic_templates/ansi/errors.txt.j2 b/defaults/diagnostic_templates/ansi/errors.txt.j2 index b1c5e847..021a3308 100644 --- a/defaults/diagnostic_templates/ansi/errors.txt.j2 +++ b/defaults/diagnostic_templates/ansi/errors.txt.j2 @@ -1,3 +1,4 @@ + {{ "Diagnostic report" | ansi_bold | ansi_red }}: {% for item in ctx %} {%- if item.error.type == "policy_violation" %} diff --git a/defaults/rego/semconv.rego b/defaults/rego/semconv.rego new file mode 100644 index 00000000..84454365 --- /dev/null +++ b/defaults/rego/semconv.rego @@ -0,0 +1,68 @@ +package semconv +import rego.v1 + +# Semantic Convention Registry Helpers +# +# This file contains a set of common rules and functions to process +# semantic convention registries. It's designed to work with both current +# and baseline (previous version) registries for compatibility checks. + +# Input Expectations: +# 1. Current Registry: +# - Accessible via `input.groups` +# - Specified by the `--registry` flag when running Weaver +# +# 2. Baseline Registry (optional): +# - Accessible via `data.groups` if provided +# - Specified by the `--baseline-registry` flag when running Weaver +# - Represents the previous version of the registry for compatibility checks + +# Define baseline and current groups +baseline_groups := data.groups # Baseline registry groups (if provided) +groups := input.groups # Current registry groups + +# Filter "registry" groups +# These comprehensions create arrays of groups whose IDs start with "registry." +# for both baseline and current registries. +registry_baseline_groups := [g | g := baseline_groups[_]; startswith(g.id, "registry.")] +registry_groups := [g | g := input.groups[_]; startswith(g.id, "registry.")] + +# Collect all attribute names from the baseline registry +# This set comprehension gathers all attribute names from groups +# in the baseline registry +baseline_attributes := {attr.name | + some g in baseline_groups + some attr in g.attributes +} + +# Collect all registry attribute names from the baseline registry +# This set comprehension gathers all attribute names from groups +# in the baseline registry that start with "registry." +registry_baseline_attributes := {attr.name | + some g in registry_baseline_groups + some attr in g.attributes +} + +# Collect all attribute names from the current registry +# Similar to baseline_attributes, but for the current groups +attributes := {attr.name | + some g in groups + some attr in g.attributes +} + +# Collect all registry attribute names from the current registry +# Similar to baseline_attributes, but for the current registry groups +registry_attributes := {attr.name | + some g in registry_groups + some attr in g.attributes +} + +# Map attribute names to their group IDs in the baseline registry +# This object comprehension creates a mapping where: +# - Keys are attribute names +# - Values are the IDs of the groups containing these attributes +# Only considers groups whose IDs start with "registry." +baseline_group_ids_by_attribute := {attr.name: g.id | + some g in registry_baseline_groups + some attr in g.attributes +} diff --git a/deny.toml b/deny.toml index 6bc9b359..deea1a44 100644 --- a/deny.toml +++ b/deny.toml @@ -51,6 +51,7 @@ allow = [ "ISC", "OpenSSL", "Zlib", + "BSL-1.0", # BOSL license used by https://crates.io/crates/lockfree-object-pool ] # List of explicitly disallowed licenses diff --git a/src/diagnostic/init.rs b/src/diagnostic/init.rs index 8be9bdc4..360dd744 100644 --- a/src/diagnostic/init.rs +++ b/src/diagnostic/init.rs @@ -28,7 +28,6 @@ pub struct DiagnosticInitArgs { } /// Initializes a `diagnostic_templates` directory to define or override diagnostic output formats. -#[cfg(not(tarpaulin_include))] pub(crate) fn command( logger: impl Logger + Sync + Clone, args: &DiagnosticInitArgs, diff --git a/src/diagnostic/mod.rs b/src/diagnostic/mod.rs index b2903f80..2d3c4718 100644 --- a/src/diagnostic/mod.rs +++ b/src/diagnostic/mod.rs @@ -50,7 +50,6 @@ pub enum DiagnosticSubCommand { } /// Manage diagnostic messages. -#[cfg(not(tarpaulin_include))] pub fn diagnostic(log: impl Logger + Sync + Clone, command: &DiagnosticCommand) -> CmdResult { match &command.command { DiagnosticSubCommand::Init(args) => { diff --git a/src/format.rs b/src/format.rs index 96173879..d5c7a905 100644 --- a/src/format.rs +++ b/src/format.rs @@ -14,7 +14,6 @@ pub(crate) enum Format { Json, } -#[cfg(not(tarpaulin_include))] pub(crate) fn apply_format(format: &Format, object: &T) -> Result { match format { Format::Yaml => serde_yaml::to_string(object) diff --git a/src/main.rs b/src/main.rs index 0583d8d0..1d080bb9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,7 +74,6 @@ impl CmdResult { } } -#[cfg(not(tarpaulin_include))] fn main() { let cli = Cli::parse(); @@ -98,7 +97,6 @@ fn main() { } /// Run the command specified by the CLI arguments and return the exit directives. -#[cfg(not(tarpaulin_include))] fn run_command(cli: &Cli, log: impl Logger + Sync + Clone) -> ExitDirectives { let cmd_result = match &cli.command { Some(Commands::Registry(params)) => semconv_registry(log.clone(), params), diff --git a/src/registry/check.rs b/src/registry/check.rs index af795c6b..b928b07a 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -5,8 +5,8 @@ use std::path::PathBuf; use clap::Args; - -use weaver_cache::Cache; +use weaver_cache::registry_path::RegistryPath; +use weaver_cache::RegistryRepo; use weaver_checker::PolicyStage; use weaver_common::diagnostic::{DiagnosticMessages, ResultExt}; use weaver_common::error::handle_errors; @@ -17,7 +17,7 @@ use weaver_semconv::registry::SemConvRegistry; use crate::registry::RegistryArgs; use crate::util::{ check_policies, check_policy_stage, init_policy_engine, load_semconv_specs, - resolve_semconv_specs, semconv_registry_path_from, + resolve_semconv_specs, }; use crate::{DiagnosticArgs, ExitDirectives}; @@ -28,6 +28,10 @@ pub struct RegistryCheckArgs { #[command(flatten)] registry: RegistryArgs, + /// Parameters to specify the baseline semantic convention registry + #[arg(long)] + baseline_registry: Option, + /// Optional list of policy files to check against the files of the semantic /// convention registry. #[arg(short = 'p', long = "policy")] @@ -47,26 +51,43 @@ pub struct RegistryCheckArgs { } /// Check a semantic convention registry. -#[cfg(not(tarpaulin_include))] pub(crate) fn command( logger: impl Logger + Sync + Clone, - cache: &Cache, args: &RegistryCheckArgs, ) -> Result { let mut diag_msgs = DiagnosticMessages::empty(); + logger.log("Weaver Registry Check"); logger.loading(&format!("Checking registry `{}`", args.registry.registry)); - let registry_id = "default"; - let registry_path = - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); + // Initialize the main registry. + let mut registry_path = args.registry.registry.clone(); + // Support for --registry-git-sub-dir + // ToDo: This parameter is now deprecated and should be removed in the future + if let RegistryPath::GitRepo { sub_folder, .. } = &mut registry_path { + if sub_folder.is_none() { + sub_folder.clone_from(&args.registry.registry_git_sub_dir); + } + } + let main_registry_repo = RegistryRepo::try_new("main", ®istry_path)?; - // Load the semantic convention registry into a local cache. + // Initialize the baseline registry if provided. + let baseline_registry_repo = if let Some(baseline_registry) = &args.baseline_registry { + Some(RegistryRepo::try_new("baseline", baseline_registry)?) + } else { + None + }; + + // Load the semantic convention registry into a local registry repo. // No parsing errors should be observed. - let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; + let main_semconv_specs = load_semconv_specs(&main_registry_repo, logger.clone())?; + let baseline_semconv_specs = baseline_registry_repo + .as_ref() + .map(|repo| load_semconv_specs(repo, logger.clone())) + .transpose()?; + let mut policy_engine = if !args.skip_policies { Some(init_policy_engine( - ®istry_path, - cache, + &main_registry_repo, &args.policies, args.display_policy_coverage, )?) @@ -83,44 +104,89 @@ pub(crate) fn command( // In this specific case, `capture_diag_msgs_into` returns either `Some(())` or `None` // if diagnostic messages have been captured. Therefore, it is acceptable to ignore the result in this // particular case. - _ = check_policies(policy_engine, &semconv_specs, logger.clone()) + _ = check_policies(policy_engine, &main_semconv_specs, logger.clone()) .capture_diag_msgs_into(&mut diag_msgs); } - let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); + let mut main_registry = + SemConvRegistry::from_semconv_specs(main_registry_repo.id(), main_semconv_specs); // Resolve the semantic convention specifications. // If there are any resolution errors, they should be captured into the ongoing list of // diagnostic messages and returned immediately because there is no point in continuing // as the resolution is a prerequisite for the next stages. - let resolved_schema = - resolve_semconv_specs(&mut registry, logger.clone()).combine_diag_msgs_with(&diag_msgs)?; + let main_resolved_schema = resolve_semconv_specs(&mut main_registry, logger.clone()) + .combine_diag_msgs_with(&diag_msgs)?; if let Some(policy_engine) = policy_engine.as_mut() { // Convert the resolved schemas into a resolved registry. // If there are any policy violations, they should be captured into the ongoing list of // diagnostic messages and returned immediately because there is no point in continuing // as the registry resolution is a prerequisite for the next stages. - let resolved_registry = ResolvedRegistry::try_from_resolved_registry( - resolved_schema - .registry(registry_id) + let main_resolved_registry = ResolvedRegistry::try_from_resolved_registry( + main_resolved_schema + .registry(main_registry_repo.id()) .expect("Failed to get the registry from the resolved schema"), - resolved_schema.catalog(), + main_resolved_schema.catalog(), ) .combine_diag_msgs_with(&diag_msgs)?; // Check the policies against the resolved registry (`PolicyState::AfterResolution`). - let errs = check_policy_stage( + let errs = check_policy_stage::( policy_engine, PolicyStage::AfterResolution, ®istry_path.to_string(), - &resolved_registry, + &main_resolved_registry, + &[], ); + logger.success(&format!( + "All `after_resolution` policies checked ({} violations found)", + errs.len() + )); // Append the policy errors to the ongoing list of diagnostic messages and if there are // any errors, return them immediately. if let Err(err) = handle_errors(errs) { diag_msgs.extend(err.into()); } + + if let (Some(baseline_registry_repo), Some(baseline_semconv_specs)) = + (baseline_registry_repo, baseline_semconv_specs) + { + let mut baseline_registry = SemConvRegistry::from_semconv_specs( + baseline_registry_repo.id(), + baseline_semconv_specs, + ); + let baseline_resolved_schema = + resolve_semconv_specs(&mut baseline_registry, logger.clone()) + .combine_diag_msgs_with(&diag_msgs)?; + let baseline_resolved_registry = ResolvedRegistry::try_from_resolved_registry( + baseline_resolved_schema + .registry(baseline_registry_repo.id()) + .expect("Failed to get the registry from the baseline resolved schema"), + baseline_resolved_schema.catalog(), + ) + .combine_diag_msgs_with(&diag_msgs)?; + + // Check the policies against the resolved registry (`PolicyState::AfterResolution`). + let errs = check_policy_stage( + policy_engine, + PolicyStage::ComparisonAfterResolution, + ®istry_path.to_string(), + &main_resolved_registry, + &[baseline_resolved_registry], + ); + logger.success(&format!( + "All `comparison_after_resolution` policies checked ({} violations found)", + errs.len() + )); + + // Append the policy errors to the ongoing list of diagnostic messages and if there are + // any errors, return them immediately. + if let Err(err) = handle_errors(errs) { + diag_msgs.extend(err.into()); + } + } + if !diag_msgs.is_empty() { return Err(diag_msgs); } @@ -152,11 +218,12 @@ mod tests { command: Some(Commands::Registry(RegistryCommand { command: RegistrySubCommand::Check(RegistryCheckArgs { registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, + baseline_registry: None, policies: vec![], skip_policies: true, display_policy_coverage: false, @@ -176,11 +243,12 @@ mod tests { command: Some(Commands::Registry(RegistryCommand { command: RegistrySubCommand::Check(RegistryCheckArgs { registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, + baseline_registry: None, policies: vec![], skip_policies: false, display_policy_coverage: false, @@ -201,11 +269,12 @@ mod tests { let registry_cmd = RegistryCommand { command: RegistrySubCommand::Check(RegistryCheckArgs { registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, + baseline_registry: None, policies: vec![], skip_policies: false, display_policy_coverage: false, @@ -221,8 +290,8 @@ mod tests { assert_eq!( diag_msgs.len(), 13 /* before resolution */ - + 3 /* metric after resolution */ - + 9 /* http after resolution */ + + 3 /* metric after resolution */ + + 9 /* http after resolution */ ); } } diff --git a/src/registry/generate.rs b/src/registry/generate.rs index fffc3479..0934290c 100644 --- a/src/registry/generate.rs +++ b/src/registry/generate.rs @@ -7,7 +7,7 @@ use std::path::PathBuf; use clap::Args; use serde_yaml::Value; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; use weaver_forge::config::{Params, WeaverConfig}; @@ -17,11 +17,8 @@ use weaver_forge::{OutputDirective, TemplateEngine, SEMCONV_JQ}; use weaver_semconv::registry::SemConvRegistry; use crate::registry::{Error, RegistryArgs}; -use crate::util::{ - check_policies, init_policy_engine, load_semconv_specs, resolve_semconv_specs, - semconv_registry_path_from, -}; -use crate::{DiagnosticArgs, ExitDirectives}; +use crate::util::{check_policies, init_policy_engine, load_semconv_specs, resolve_semconv_specs}; +use crate::{registry, DiagnosticArgs, ExitDirectives}; /// Parameters for the `registry generate` sub-command #[derive(Debug, Args)] @@ -90,7 +87,6 @@ fn parse_key_val(s: &str) -> Result<(String, Value), Error> { /// Generate artifacts from a semantic convention registry. pub(crate) fn command( logger: impl Logger + Sync + Clone, - cache: &Cache, args: &RegistryGenerateArgs, ) -> Result { logger.loading(&format!( @@ -99,15 +95,21 @@ pub(crate) fn command( )); let params = generate_params(args)?; + let mut registry_path = args.registry.registry.clone(); + // Support for --registry-git-sub-dir (should be removed in the future) + if let registry::RegistryPath::GitRepo { sub_folder, .. } = &mut registry_path { + if sub_folder.is_none() { + sub_folder.clone_from(&args.registry.registry_git_sub_dir); + } + } let registry_id = "default"; - let registry_path = - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; // Load the semantic convention registry into a local cache. - let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; + let semconv_specs = load_semconv_specs(®istry_repo, logger.clone())?; if !args.skip_policies { - let policy_engine = init_policy_engine(®istry_path, cache, &args.policies, false)?; + let policy_engine = init_policy_engine(®istry_repo, &args.policies, false)?; check_policies(&policy_engine, &semconv_specs, logger.clone())?; } @@ -203,9 +205,9 @@ mod tests { param: None, params: None, registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, policies: vec![], @@ -273,9 +275,9 @@ mod tests { param: None, params: None, registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, policies: vec![], @@ -315,9 +317,9 @@ mod tests { param: None, params: None, registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, policies: vec![], diff --git a/src/registry/json_schema.rs b/src/registry/json_schema.rs index 2a3653aa..6b3424e1 100644 --- a/src/registry/json_schema.rs +++ b/src/registry/json_schema.rs @@ -10,7 +10,6 @@ use schemars::schema_for; use serde::Serialize; use serde_json::to_string_pretty; use std::path::PathBuf; -use weaver_cache::Cache; use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::Logger; use weaver_forge::registry::ResolvedRegistry; @@ -57,10 +56,8 @@ impl From for DiagnosticMessages { /// Generate the JSON Schema of a ResolvedRegistry and write the JSON schema to a /// file or print it to stdout. -#[cfg(not(tarpaulin_include))] pub(crate) fn command( logger: impl Logger + Sync + Clone, - _cache: &Cache, args: &RegistryJsonSchemaArgs, ) -> Result { let json_schema = schema_for!(ResolvedRegistry); diff --git a/src/registry/mod.rs b/src/registry/mod.rs index ee8a04dc..9763cc11 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -2,9 +2,7 @@ //! Commands to manage a semantic convention registry. -use std::fmt::Display; use std::path::PathBuf; -use std::str::FromStr; use clap::{Args, Subcommand}; use miette::Diagnostic; @@ -18,7 +16,7 @@ use crate::registry::stats::RegistryStatsArgs; use crate::registry::update_markdown::RegistryUpdateMarkdownArgs; use crate::CmdResult; use check::RegistryCheckArgs; -use weaver_cache::Cache; +use weaver_cache::registry_path::RegistryPath; use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::Logger; @@ -105,92 +103,56 @@ pub enum RegistrySubCommand { JsonSchema(RegistryJsonSchemaArgs), } -/// Path to a semantic convention registry. -/// The path can be a local directory or a Git URL. -#[derive(Debug, Clone)] -pub enum RegistryPath { - Local(String), - Url(String), -} - -/// Implement the `FromStr` trait for `RegistryPath`, so that it can be used as -/// a command-line argument. -impl FromStr for RegistryPath { - type Err = String; - - /// Parse a string into a `RegistryPath`. - fn from_str(s: &str) -> Result { - if s.starts_with("http://") || s.starts_with("https://") { - Ok(Self::Url(s.to_owned())) - } else { - Ok(Self::Local(s.to_owned())) - } - } -} - -/// Implement the `Display` trait for `RegistryPath`, so that it can be printed -/// to the console. -impl Display for RegistryPath { - /// Format the `RegistryPath` as a string. - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - RegistryPath::Local(path) => write!(f, "{}", path), - RegistryPath::Url(url) => write!(f, "{}", url), - } - } -} - /// Set of parameters used to specify a semantic convention registry. #[derive(Args, Debug)] pub struct RegistryArgs { - /// Local path or Git URL of the semantic convention registry. + /// Local folder, Git repo URL, or Git archive URL of the semantic + /// convention registry. For Git URLs, a sub-folder can be specified + /// using the `[sub-folder]` syntax after the URL. #[arg( short = 'r', long, - default_value = "https://github.com/open-telemetry/semantic-conventions.git" + default_value = "https://github.com/open-telemetry/semantic-conventions.git[model]" )] pub registry: RegistryPath, /// Optional path in the Git repository where the semantic convention - /// registry is located + /// registry is located. This parameter is deprecated and should be + /// removed in the future. Please use the `[sub-folder]` syntax after the + /// URL in the `--registry` or `--baseline-registry` parameters instead. #[arg(short = 'd', long, default_value = "model")] pub registry_git_sub_dir: Option, } /// Manage a semantic convention registry and return the exit code. pub fn semconv_registry(log: impl Logger + Sync + Clone, command: &RegistryCommand) -> CmdResult { - let cache = match Cache::try_new() { - Ok(cache) => cache, - Err(e) => return CmdResult::new(Err(e.into()), None), - }; - match &command.command { RegistrySubCommand::Check(args) => CmdResult::new( - check::command(log.clone(), &cache, args), + check::command(log.clone(), args), Some(args.diagnostic.clone()), ), RegistrySubCommand::Generate(args) => CmdResult::new( - generate::command(log.clone(), &cache, args), + generate::command(log.clone(), args), Some(args.diagnostic.clone()), ), RegistrySubCommand::Stats(args) => CmdResult::new( - stats::command(log.clone(), &cache, args), + stats::command(log.clone(), args), Some(args.diagnostic.clone()), ), RegistrySubCommand::Resolve(args) => CmdResult::new( - resolve::command(log.clone(), &cache, args), + resolve::command(log.clone(), args), Some(args.diagnostic.clone()), ), RegistrySubCommand::Search(args) => CmdResult::new( - search::command(log.clone(), &cache, args), + search::command(log.clone(), args), Some(args.diagnostic.clone()), ), RegistrySubCommand::UpdateMarkdown(args) => CmdResult::new( - update_markdown::command(log.clone(), &cache, args), + update_markdown::command(log.clone(), args), Some(args.diagnostic.clone()), ), RegistrySubCommand::JsonSchema(args) => CmdResult::new( - json_schema::command(log.clone(), &cache, args), + json_schema::command(log.clone(), args), Some(args.diagnostic.clone()), ), } diff --git a/src/registry/resolve.rs b/src/registry/resolve.rs index 0eef24d3..5497ca33 100644 --- a/src/registry/resolve.rs +++ b/src/registry/resolve.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use clap::Args; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; use weaver_forge::registry::ResolvedRegistry; @@ -14,11 +14,8 @@ use weaver_semconv::registry::SemConvRegistry; use crate::format::{apply_format, Format}; use crate::registry::RegistryArgs; -use crate::util::{ - check_policies, init_policy_engine, load_semconv_specs, resolve_semconv_specs, - semconv_registry_path_from, -}; -use crate::{DiagnosticArgs, ExitDirectives}; +use crate::util::{check_policies, init_policy_engine, load_semconv_specs, resolve_semconv_specs}; +use crate::{registry, DiagnosticArgs, ExitDirectives}; /// Parameters for the `registry resolve` sub-command #[derive(Debug, Args)] @@ -61,10 +58,8 @@ pub struct RegistryResolveArgs { /// Resolve a semantic convention registry and write the resolved schema to a /// file or print it to stdout. -#[cfg(not(tarpaulin_include))] pub(crate) fn command( logger: impl Logger + Sync + Clone, - cache: &Cache, args: &RegistryResolveArgs, ) -> Result { if args.output.is_none() { @@ -72,15 +67,22 @@ pub(crate) fn command( } logger.loading(&format!("Resolving registry `{}`", args.registry.registry)); + let mut registry_path = args.registry.registry.clone(); + // Support for --registry-git-sub-dir (should be removed in the future) + if let registry::RegistryPath::GitRepo { sub_folder, .. } = &mut registry_path { + if sub_folder.is_none() { + sub_folder.clone_from(&args.registry.registry_git_sub_dir); + } + } + let registry_id = "default"; - let registry_path = - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; // Load the semantic convention registry into a local cache. - let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; + let semconv_specs = load_semconv_specs(®istry_repo, logger.clone())?; if !args.skip_policies { - let policy_engine = init_policy_engine(®istry_path, cache, &args.policies, false)?; + let policy_engine = init_policy_engine(®istry_repo, &args.policies, false)?; check_policies(&policy_engine, &semconv_specs, logger.clone())?; } @@ -140,9 +142,9 @@ mod tests { command: Some(Commands::Registry(RegistryCommand { command: RegistrySubCommand::Resolve(RegistryResolveArgs { registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, lineage: true, @@ -166,9 +168,9 @@ mod tests { command: Some(Commands::Registry(RegistryCommand { command: RegistrySubCommand::Resolve(RegistryResolveArgs { registry: RegistryArgs { - registry: RegistryPath::Local( - "crates/weaver_codegen_test/semconv_registry/".to_owned(), - ), + registry: RegistryPath::LocalFolder { + path: "crates/weaver_codegen_test/semconv_registry/".to_owned(), + }, registry_git_sub_dir: None, }, lineage: true, diff --git a/src/registry/search.rs b/src/registry/search.rs index c12c5afa..c948bd87 100644 --- a/src/registry/search.rs +++ b/src/registry/search.rs @@ -5,15 +5,16 @@ use clap::Args; use itertools::Itertools; use miette::Diagnostic; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; use weaver_resolved_schema::{attribute::Attribute, ResolvedTelemetrySchema}; use weaver_semconv::registry::SemConvRegistry; use crate::{ + registry, registry::RegistryArgs, - util::{load_semconv_specs, resolve_semconv_specs, semconv_registry_path_from}, + util::{load_semconv_specs, resolve_semconv_specs}, DiagnosticArgs, ExitDirectives, }; use crossterm::{ @@ -372,17 +373,22 @@ fn run_command_line_search(schema: &ResolvedTelemetrySchema, pattern: &str) { pub(crate) fn command( logger: impl Logger + Sync + Clone, - cache: &Cache, args: &RegistrySearchArgs, ) -> Result { logger.loading(&format!("Resolving registry `{}`", args.registry.registry)); let registry_id = "default"; - let registry_path = - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); + let mut registry_path = args.registry.registry.clone(); + // Support for --registry-git-sub-dir (should be removed in the future) + if let registry::RegistryPath::GitRepo { sub_folder, .. } = &mut registry_path { + if sub_folder.is_none() { + sub_folder.clone_from(&args.registry.registry_git_sub_dir); + } + } + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; // Load the semantic convention registry into a local cache. - let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; + let semconv_specs = load_semconv_specs(®istry_repo, logger.clone())?; let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); let schema = resolve_semconv_specs(&mut registry, logger.clone())?; diff --git a/src/registry/stats.rs b/src/registry/stats.rs index bd1c1564..bc382331 100644 --- a/src/registry/stats.rs +++ b/src/registry/stats.rs @@ -3,10 +3,10 @@ //! Compute stats on a semantic convention registry. use crate::registry::RegistryArgs; -use crate::util::{load_semconv_specs, resolve_semconv_specs, semconv_registry_path_from}; -use crate::{DiagnosticArgs, ExitDirectives}; +use crate::util::{load_semconv_specs, resolve_semconv_specs}; +use crate::{registry, DiagnosticArgs, ExitDirectives}; use clap::Args; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; use weaver_resolved_schema::registry::{CommonGroupStats, GroupStats}; @@ -27,10 +27,8 @@ pub struct RegistryStatsArgs { } /// Compute stats on a semantic convention registry. -#[cfg(not(tarpaulin_include))] pub(crate) fn command( logger: impl Logger + Sync + Clone, - cache: &Cache, args: &RegistryStatsArgs, ) -> Result { logger.loading(&format!( @@ -39,11 +37,17 @@ pub(crate) fn command( )); let registry_id = "default"; - let registry_path = - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); + let mut registry_path = args.registry.registry.clone(); + // Support for --registry-git-sub-dir (should be removed in the future) + if let registry::RegistryPath::GitRepo { sub_folder, .. } = &mut registry_path { + if sub_folder.is_none() { + sub_folder.clone_from(&args.registry.registry_git_sub_dir); + } + } + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; // Load the semantic convention registry into a local cache. - let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; + let semconv_specs = load_semconv_specs(®istry_repo, logger.clone())?; let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); display_semconv_registry_stats(®istry); @@ -58,14 +62,12 @@ pub(crate) fn command( }) } -#[cfg(not(tarpaulin_include))] fn display_semconv_registry_stats(semconv_registry: &SemConvRegistry) { let stats = semconv_registry.stats(); println!("Semantic Convention Registry Stats:"); println!(" - Total number of files: {}", stats.file_count); } -#[cfg(not(tarpaulin_include))] fn display_schema_stats(schema: &ResolvedTelemetrySchema) { let stats = schema.stats(); println!("Resolved Telemetry Schema Stats:"); @@ -167,7 +169,6 @@ fn display_schema_stats(schema: &ResolvedTelemetrySchema) { } } -#[cfg(not(tarpaulin_include))] fn display_common_group_stats(group_type: &GroupType, common_stats: &CommonGroupStats) { println!(" - {} {:#?}s", common_stats.count, group_type); println!( diff --git a/src/registry/update_markdown.rs b/src/registry/update_markdown.rs index a21e6be2..631509d3 100644 --- a/src/registry/update_markdown.rs +++ b/src/registry/update_markdown.rs @@ -4,10 +4,9 @@ //! update the specified sections. use crate::registry::RegistryArgs; -use crate::util::semconv_registry_path_from; -use crate::{DiagnosticArgs, ExitDirectives}; +use crate::{registry, DiagnosticArgs, ExitDirectives}; use clap::Args; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; use weaver_forge::config::{Params, WeaverConfig}; @@ -55,7 +54,6 @@ pub struct RegistryUpdateMarkdownArgs { /// Update markdown files. pub(crate) fn command( log: impl Logger + Sync + Clone, - cache: &Cache, args: &RegistryUpdateMarkdownArgs, ) -> Result { fn is_markdown(entry: &walkdir::DirEntry) -> bool { @@ -78,11 +76,15 @@ pub(crate) fn command( } }; - let generator = SnippetGenerator::try_from_url( - semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir), - cache, - generator, - )?; + let mut registry_path = args.registry.registry.clone(); + // Support for --registry-git-sub-dir (should be removed in the future) + if let registry::RegistryPath::GitRepo { sub_folder, .. } = &mut registry_path { + if sub_folder.is_none() { + sub_folder.clone_from(&args.registry.registry_git_sub_dir); + } + } + let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let generator = SnippetGenerator::try_from_registry_repo(®istry_repo, generator)?; log.success("Registry resolved successfully"); let operation = if args.dry_run { "Validating" @@ -137,7 +139,9 @@ mod tests { command: RegistrySubCommand::UpdateMarkdown(RegistryUpdateMarkdownArgs { markdown_dir: "data/update_markdown/markdown".to_owned(), registry: RegistryArgs { - registry: RegistryPath::Local("data/update_markdown/registry".to_owned()), + registry: RegistryPath::LocalFolder { + path: "data/update_markdown/registry".to_owned(), + }, registry_git_sub_dir: None, }, dry_run: true, diff --git a/src/util.rs b/src/util.rs index 8764a1c9..ef931405 100644 --- a/src/util.rs +++ b/src/util.rs @@ -3,13 +3,12 @@ //! Utility functions for resolving a semantic convention registry and checking policies. //! This module supports the `schema` and `registry` commands. -use crate::registry::RegistryPath; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use serde::Serialize; use std::path::PathBuf; -use weaver_cache::Cache; +use weaver_cache::RegistryRepo; use weaver_checker::Error::{InvalidPolicyFile, PolicyViolation}; -use weaver_checker::{Engine, Error, PolicyStage}; +use weaver_checker::{Engine, Error, PolicyStage, SEMCONV_REGO}; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::error::handle_errors; use weaver_common::Logger; @@ -18,27 +17,6 @@ use weaver_resolver::SchemaResolver; use weaver_semconv::registry::SemConvRegistry; use weaver_semconv::semconv::SemConvSpec; -/// Converts a `RegistryPath` to a `weaver_semconv::path::RegistryPath`. -/// -/// # Arguments -/// -/// * `registry`: A reference to a registry of telemetry schema. This is considered identifying for that registry, e.g a git url or local file path. -/// * `path`: An optional string representing a sub-directory in the registry identifying path where model/yaml files are located. -pub(crate) fn semconv_registry_path_from( - registry: &RegistryPath, - path: &Option, -) -> weaver_semconv::path::RegistryPath { - match registry { - RegistryPath::Local(path) => weaver_semconv::path::RegistryPath::Local { - path_pattern: path.clone(), - }, - RegistryPath::Url(url) => weaver_semconv::path::RegistryPath::GitUrl { - git_url: url.clone(), - path: path.clone(), - }, - } -} - /// Loads the semantic convention specifications from a registry path. /// /// # Arguments @@ -52,13 +30,14 @@ pub(crate) fn semconv_registry_path_from( /// A `Result` containing a vector of tuples with file names and `SemConvSpec` on success, /// or a `weaver_resolver::Error` on failure. pub(crate) fn load_semconv_specs( - registry_path: &weaver_semconv::path::RegistryPath, - cache: &Cache, + registry_repo: &RegistryRepo, log: impl Logger + Sync + Clone, ) -> Result, weaver_resolver::Error> { - let semconv_specs = SchemaResolver::load_semconv_specs(registry_path, cache)?; + let semconv_specs = SchemaResolver::load_semconv_specs(registry_repo)?; log.success(&format!( - "SemConv registry loaded ({} files)", + "`{}` semconv registry `{}` loaded ({} files)", + registry_repo.id(), + registry_repo.registry_path_repr(), semconv_specs.len() )); Ok(semconv_specs) @@ -78,8 +57,7 @@ pub(crate) fn load_semconv_specs( /// A `Result` containing the initialized `Engine` on success, or `DiagnosticMessages` /// on failure. pub(crate) fn init_policy_engine( - registry_path: &weaver_semconv::path::RegistryPath, - cache: &Cache, + registry_repo: &RegistryRepo, policies: &[PathBuf], policy_coverage: bool, ) -> Result { @@ -89,13 +67,18 @@ pub(crate) fn init_policy_engine( engine.enable_coverage(); } + // Add the standard semconv policies + // Note: `add_policy` the package name, we ignore it here as we don't need it + _ = engine + .add_policy("defaults/rego/semconv.rego", SEMCONV_REGO) + .map_err(DiagnosticMessages::from_error)?; + // Add policies from the registry - let (registry_path, _) = SchemaResolver::path_to_registry(registry_path, cache)?; - _ = engine.add_policies(registry_path.as_path(), "*.rego")?; + _ = engine.add_policies(registry_repo.path(), "*.rego")?; // Add policies from the command line for policy in policies { - _ = engine.add_policy(policy)?; + _ = engine.add_policy_from_file(policy)?; } Ok(engine) @@ -114,14 +97,24 @@ pub(crate) fn init_policy_engine( /// # Returns /// /// A list of policy violations represented as errors. -pub(crate) fn check_policy_stage( +pub(crate) fn check_policy_stage( policy_engine: &mut Engine, policy_stage: PolicyStage, policy_file: &str, input: &T, + data: &[U], ) -> Vec { let mut errors = vec![]; + for d in data { + if let Err(err) = policy_engine.add_data(d) { + errors.push(InvalidPolicyFile { + file: policy_file.to_owned(), + error: err.to_string(), + }); + } + } + match policy_engine.set_input(input) { Ok(_) => match policy_engine.check(policy_stage) { Ok(violations) => { @@ -167,11 +160,12 @@ pub(crate) fn check_policy( // Create a local policy engine inheriting the policies // from the global policy engine let mut policy_engine = policy_engine.clone(); - check_policy_stage( + check_policy_stage::( &mut policy_engine, PolicyStage::BeforeResolution, path, semconv, + &[], ) }) .collect::>(); @@ -205,9 +199,9 @@ pub(crate) fn check_policies( DiagnosticMessages::from_error(e) } })?; - logger.success("Policies checked"); + logger.success("All `before_resolution` policies checked"); } else { - logger.success("No policy found"); + logger.success("No `before_resolution` policy found"); } Ok(()) } @@ -227,8 +221,9 @@ pub(crate) fn resolve_semconv_specs( registry: &mut SemConvRegistry, logger: impl Logger + Sync + Clone, ) -> Result { + let registry_id = registry.id().to_owned(); let resolved_schema = SchemaResolver::resolve_semantic_convention_registry(registry)?; - logger.success("SemConv registry resolved"); + logger.success(&format!("`{}` semconv registry resolved", registry_id)); Ok(resolved_schema) } diff --git a/test_data/attribute_name_collisions.rego b/test_data/attribute_name_collisions.rego new file mode 100644 index 00000000..247c05ff --- /dev/null +++ b/test_data/attribute_name_collisions.rego @@ -0,0 +1,68 @@ +package after_resolution + +import rego.v1 + +# Pre-compute const names and namespaces +const_names := {name: to_const_name(name) | + some g in input.groups + some attr in g.attributes + name := attr.name +} + +namespaces := {name: concat("", [name, "."]) | + some g in input.groups + some attr in g.attributes + name := attr.name +} + +deny contains violation if { + some name, const_name in const_names + not excluded_const_collisions[name] + collisions := [other_name | + some other_name, other_const in const_names + other_name != name + other_const == const_name + not excluded_const_collisions[other_name] + ] + count(collisions) > 0 + violation := attr_registry_collision( + "Attribute '%s' has the same constant name '%s' as '%s'.", + [name, const_name, concat(", ", sort(collisions))], + name + ) +} + +deny contains violation if { + some name, namespace in namespaces + not excluded_namespace_collisions[name] + collisions := [other_name | + some other_name, other_namespace in namespaces + startswith(other_name, namespace) + other_name != name + #not excluded_namespace_collisions[other_name] + ] + count(collisions) > 0 + violation := attr_registry_collision( + "Attribute '%s' name is used as a namespace in the following attributes '%s'.", + [name, concat(", ", sort(collisions))], + name + ) +} + +attr_registry_collision(description, args, attr_name) := { + "id": sprintf(description, args), + "type": "semconv_attribute", + "category": "naming_collision", + "attr": attr_name, + "group": "" +} + +to_const_name(name) := replace(name, ".", "_") + +# TODO - we'll need to specify how collision resolution happens in the schema - +# see phase 2 in https://github.com/open-telemetry/semantic-conventions/issues/1118#issuecomment-2173803006 +# For now just allow current collisions. +#excluded_const_collisions := {"messaging.client_id"} +excluded_const_collisions := {} +#excluded_namespace_collisions := {"messaging.operation", "db.operation", "deployment.environment"} +excluded_namespace_collisions := {} \ No newline at end of file diff --git a/test_data/compatibility_check.rego b/test_data/compatibility_check.rego new file mode 100644 index 00000000..8bcf9ea3 --- /dev/null +++ b/test_data/compatibility_check.rego @@ -0,0 +1,49 @@ +package comparison_after_resolution + +import rego.v1 + +# Semantic Convention Registry Compatibility Checker +# +# This file contains rules for checking backward compatibility +# between different versions of semantic convention registries. +# It builds upon the data structures and rules defined in the +# semconv package. + +# Import the set of baseline attributes from the semconv package +baseline_attributes := data.semconv.registry_baseline_attributes + +# Rule: Detect Removed Attributes +# +# This rule checks for attributes that existed in the baseline registry +# but are no longer present in the current registry. Removing attributes +# is considered a backward compatibility violation. +# +# In other words, we do not allow the removal of an attribute once added +# to the registry. It must exist SOMEWHERE in a group. +# +# The rule populates the 'deny' set with compatibility violations. +deny contains back_comp_violation(description, group_id, attr_name) if { + # Check if an attribute from the baseline is missing in the current registry + some attr_name in baseline_attributes + not data.semconv.registry_attributes[attr_name] + + # Retrieve the group ID of the attribute from the baseline registry + group_id := data.semconv.baseline_group_ids_by_attribute[attr_name] + + # Generate a description of the violation + description := sprintf("Attribute '%s' no longer exists in the attribute registry", [attr_name]) +} + +# Helper Function: Create Backward Compatibility Violation Object +# +# This function generates a structured violation object for each +# detected backward compatibility issue. +back_comp_violation(description, group_id, attr_id) := violation if { + violation := { + "id": description, + "type": "semconv_attribute", + "category": "backward_compatibility", + "group": group_id, + "attr": attr_id, + } +} \ No newline at end of file diff --git a/test_data/display_evolution.rego b/test_data/display_evolution.rego new file mode 100644 index 00000000..4803357c --- /dev/null +++ b/test_data/display_evolution.rego @@ -0,0 +1,41 @@ +package comparison_after_resolution + +# Collect all groups from input.groups +curr_groups := {g | g := input.groups[_]; startswith(g.id, "registry.")} + +# Collect all groups from data.groups +prev_groups := {g | g := data.groups[_]; startswith(g.id, "registry.")} + +# Collect all group IDs from input.groups +curr_group_ids := {g.id | g := curr_groups[_]} + +# Collect all group IDs from data.groups +prev_group_ids := {g.id | g := prev_groups[_]} + +# Determine added group IDs in curr_group_ids that are not in prev_group_ids +added_group_ids := {id | id := curr_group_ids[_]; not prev_group_ids[id]} + +# Determine removed group IDs in prev_group_ids that are not in curr_group_ids +removed_group_ids := {id | id := prev_group_ids[_]; not curr_group_ids[id]} + +# Detect all added groups +deny[empty_violation()] { + print("This group ", added_group_ids[_]," was added") + false +} + +# Detect all removed groups +deny[empty_violation()] { + print("This group ", removed_group_ids[_], " was removed") + false +} + +empty_violation() = violation { + violation := { + "id": "", + "type": "all", + "category": "info", + "group": "", + "attr": "", + } +} \ No newline at end of file diff --git a/test_data/registry.rego b/test_data/registry.rego new file mode 100644 index 00000000..7d4cf31b --- /dev/null +++ b/test_data/registry.rego @@ -0,0 +1,63 @@ +package before_resolution + +# This file enforces policies requiring all attributes to be defined within +# a semantic convention "registry". This is a naming/structure convention +# used by semantic conventions. + +# Helper to create attribute registry violations. +attr_registry_violation(description, group_id, attr_id) = violation { + violation := { + "id": description, + "type": "semconv_attribute", + "category": "attribute_registry_checks", + "group": group_id, + "attr": attr_id, + } +} + +# We only allow attribute groups in the attribute registry. +deny[attr_registry_violation(description, group.id, "")] { + group := input.groups[_] + startswith(group.id, "registry.") + group.type != "attribute_group" + + # TODO - separate violation_id and description once weaver supports it. + # violation_id := "attribute_registry_can_only_contain_attribute_groups" + description := sprintf("Registry group '%s' has invalid type '%s'. Groups in attribute registry must have `attribute_group` type.", [group.id, group.type]) +} + +# Any group that is NOT in the attribute registry that has an attribute id is +# in violation of not using the attribute registry. +deny[attr_registry_violation(description, group.id, attr_name)] { + group := input.groups[_] + not startswith(group.id, "registry.") + attr := group.attributes[_] + attr.id != null + + attr_name := get_attribute_name(attr, group) + + # TODO - separate violation_id and description once weaver supports it. + # violation_id := "attributes_must_be_defined_in_attribute_registry" + description := sprintf("Attribute '%s' is defined in the group '%s' which is not part of the attribute registry. Attributes can be defined in the registry group only.", [attr_name, group.id]) +} + +# A registry `attribute_group` containing at least one `ref` attribute is +# considered invalid if it's not in the registry group. +deny[attr_registry_violation("attributes_in_registry_cannot_reference_each_other", group.id, attr.ref)] { + # TODO - this will need to be updated to support `embed` in the future. + group := input.groups[_] + startswith(group.id, "registry.") + attr := group.attributes[_] + attr.ref != null + + # TODO - separate violation_id and description once weaver supports it. + # violation_id := "attributes_in_registry_cannot_reference_each_other" + description := sprintf("Registry group '%s' references attribute '%s'. Registry groups can only define new attributes.", [group.id, attr.ref]) +} + +get_attribute_name(attr, group) = name { + full_name = concat(".", [group.prefix, attr.id]) + + # if there was no prefix, we have a leading dot + name := trim(full_name, ".") +} \ No newline at end of file diff --git a/test_data/semantic-conventions-1.26.0.tar.gz b/test_data/semantic-conventions-1.26.0.tar.gz new file mode 100644 index 00000000..1fd9b134 Binary files /dev/null and b/test_data/semantic-conventions-1.26.0.tar.gz differ diff --git a/test_data/semantic-conventions-1.26.0.zip b/test_data/semantic-conventions-1.26.0.zip new file mode 100644 index 00000000..6c5c4783 Binary files /dev/null and b/test_data/semantic-conventions-1.26.0.zip differ diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 395bde43..447efdb7 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -2,12 +2,12 @@ //! Integration tests for the resolution process. -use weaver_cache::Cache; -use weaver_common::{Logger, TestLogger}; +use weaver_cache::registry_path::RegistryPath; +use weaver_cache::RegistryRepo; +use weaver_common::TestLogger; use weaver_resolver::attribute::AttributeCatalog; use weaver_resolver::registry::resolve_semconv_registry; use weaver_resolver::SchemaResolver; -use weaver_semconv::path::RegistryPath; use weaver_semconv::registry::SemConvRegistry; /// The URL of the official semantic convention registry. @@ -27,23 +27,21 @@ const SEMCONV_REGISTRY_MODEL: &str = "model"; #[test] fn test_cli_interface() { let log = TestLogger::new(); - let cache = Cache::try_new().unwrap_or_else(|e| { - log.error(&e.to_string()); - panic!("Failed to create the git cache repo, error: {e}"); - }); - let registry_id = "default"; // Load the official semantic convention registry into a local cache. // No parsing errors should be observed. - let registry_path = RegistryPath::GitUrl { - git_url: SEMCONV_REGISTRY_URL.to_owned(), - path: Some(SEMCONV_REGISTRY_MODEL.to_owned()), + let registry_path = RegistryPath::GitRepo { + url: SEMCONV_REGISTRY_URL.to_owned(), + sub_folder: Some(SEMCONV_REGISTRY_MODEL.to_owned()), + refspec: None, }; - let semconv_specs = - SchemaResolver::load_semconv_specs(®istry_path, &cache).unwrap_or_else(|e| { - panic!("Failed to load the semantic convention specs, error: {e}"); - }); + let registry_repo = RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); + let semconv_specs = SchemaResolver::load_semconv_specs(®istry_repo).unwrap_or_else(|e| { + panic!("Failed to load the semantic convention specs, error: {e}"); + }); let semconv_specs = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); // Check if the logger has reported any warnings or errors.