diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4ed4b34 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,95 @@ +# CHANGELOG + + + +## v0.0.1 (2024-12-13) + +### Fix + +* fix: experiment script typo ([`8a14c12`](https://github.com/dtch1997/steering-bench/commit/8a14c12bd3eb0d3bb2af0e382f356cfeeb57cf3a)) + +* fix: build dataset ([`7ef7e8e`](https://github.com/dtch1997/steering-bench/commit/7ef7e8e273b6e5be41f2a80092164240373642ec)) + +### Unknown + +* add ci (#3) + +* add ci, pre-commit + +* fix ci + +* update ci + +* update ruff config + +--------- + +Co-authored-by: Daniel Tan <dtch1997@users.noreply.github.com> ([`33c8a2b`](https://github.com/dtch1997/steering-bench/commit/33c8a2b8195b93bbdbeffb9e449379a0f7c1f724)) + +* Update README.md ([`7ed9e3b`](https://github.com/dtch1997/steering-bench/commit/7ed9e3b43d2d993c99a74ccc653dfde3efa290d0)) + +* add arxiv badge ([`f302b40`](https://github.com/dtch1997/steering-bench/commit/f302b404373d42526291bf12f6ab73c8a07f87fc)) + +* update ([`15f3930`](https://github.com/dtch1997/steering-bench/commit/15f39308509a286a7ad5d68e37c06e49a1fd6c06)) + +* rename persona_generalization to steering_generalization ([`0ddd1ad`](https://github.com/dtch1997/steering-bench/commit/0ddd1ad8b39885270142668b764e103efcd58ad4)) + +* Update README.md ([`a17bfba`](https://github.com/dtch1997/steering-bench/commit/a17bfbaa2e97746765c2abba62eb9b0c06f46e0c)) + +* Update README.md ([`b566646`](https://github.com/dtch1997/steering-bench/commit/b5666462ffa8fedfad7ce1c69576e3f8a182a974)) + +* Update README.md ([`95295e6`](https://github.com/dtch1997/steering-bench/commit/95295e6d99a15719707074fbc226626ed6418c78)) + +* Create README.md ([`fae2efb`](https://github.com/dtch1997/steering-bench/commit/fae2efb446b100ad87c7d088b6c205709e558f1b)) + +* Create README.md ([`9151b2a`](https://github.com/dtch1997/steering-bench/commit/9151b2a09b51be95c2310ebff207bb015204c8e9)) + +* Update README.md ([`0f25019`](https://github.com/dtch1997/steering-bench/commit/0f25019d0293aaf45f60ee2225c64d492fa90db3)) + +* Refusal (#2) + +* add refusal dataset + +* wip refusal experiment + +--------- + +Co-authored-by: Daniel CH Tan <dtch1997@users.noreply.github.com> ([`893d67a`](https://github.com/dtch1997/steering-bench/commit/893d67a129cba415ccfef5f7b8b56ca955e90f27)) + +* remove redundant script ([`3d4870f`](https://github.com/dtch1997/steering-bench/commit/3d4870fd2284edb61b18b2028c48bcf802c22057)) + +* refactor ([`ffe4666`](https://github.com/dtch1997/steering-bench/commit/ffe466652715b893b356bcc99218bd3e46ed085c)) + +* defer formatting to apply_chat_template ([`53021b6`](https://github.com/dtch1997/steering-bench/commit/53021b62c51c64e58909541a484fb662789e09e7)) + +* add persona generalization experiment ([`3d70293`](https://github.com/dtch1997/steering-bench/commit/3d7029358e88d0dc0a9d442346d3a809b9a8ece5)) + +* delete results ([`8dac290`](https://github.com/dtch1997/steering-bench/commit/8dac290a83a846cc73b6c49cf6ab13bf17f490a6)) + +* steering experiments (#1) + +* wip steering experiment + +* minimal working version done + +* refactor + +* add working layer sweep script + +* working layer sweep + +--------- + +Co-authored-by: Daniel Tan <dtch1997@users.noreply.github.com> ([`b571672`](https://github.com/dtch1997/steering-bench/commit/b5716725eb679e23724ec785dfb10bb11c501db4)) + +* add dataset builder ([`29783a3`](https://github.com/dtch1997/steering-bench/commit/29783a3d88f9728188183237acef8370a8b0a8d6)) + +* update interfaces ([`5589d8b`](https://github.com/dtch1997/steering-bench/commit/5589d8b3ef1fe0a528b712f6a971887fff149120)) + +* add pipeline, steering hook ([`d27e7f6`](https://github.com/dtch1997/steering-bench/commit/d27e7f6d0ef6577e16482ac1e641e2353ead1f83)) + +* add dataset preprocessing ([`33923b8`](https://github.com/dtch1997/steering-bench/commit/33923b895d4cfb6eec1f9c4d1ca4b81e4704ff61)) + +* add datasets ([`167d8a2`](https://github.com/dtch1997/steering-bench/commit/167d8a2101c0b6a66b8e291e62bd5e30c150cf49)) + +* initial commit ([`6899bb2`](https://github.com/dtch1997/steering-bench/commit/6899bb20ddb84a8a480152dfe175527311f1077f))