Skip to content

Commit

Permalink
Translation experiments (#99)
Browse files Browse the repository at this point in the history
* adding helpers for generating and parsing language from dataset filename

* adding compare_dataset_translations experiment

* adding experiment helpers

* tweaking pirate translation strings

* adding translations for non-persona mwes

* fixing up make mwe helper

* adding a 'ctx' pseudo-style

* Revert "adding a 'ctx' pseudo-style"

This reverts commit a0058c4.

* refactoring to allow using arbitrary dataset variations, insead of the hacky pseudo-language stuff

* fixing using existing results in cross-steering

* adding helpers to calculate jenson-shannon and KL for bernoulli distributions

* using js dist for steering deltas

* adding more tests
  • Loading branch information
chanind authored Feb 21, 2024
1 parent 1a42e96 commit 46927b0
Show file tree
Hide file tree
Showing 19 changed files with 12,484 additions and 51 deletions.
76 changes: 38 additions & 38 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,29 @@ authors = [
classifiers = ["Private :: Do Not Upload"]
requires-python = ">=3.10,<4.0"
dependencies = [
"pyrallis>=0.3.1",
"wandb>=0.16.0",
"transformers>=4.35.2",
"torch>=2.1.1",
"datasets>=2.15.0",
"numpy>=1.26.2",
"accelerate>=0.26.1",
"scikit-learn>=1.3.2",
"evaluate>=0.4.1",
"rouge-score>=0.1.2",
"termcolor>=2.4.0",
"bitsandbytes>=0.42.0",
"nbdime>=4.0.1",
"steering-vectors>=0.3.0",
"openai>=1.10.0",
"arrr>=1.0.4",
"spacy>=3.7.2",
"mosestokenizer>=1.2.1",
"gradio>=4.16.0",
"simple-parsing>=0.1.5",
"torchmetrics>=1.3.0.post0",
"umap-learn>=0.5.5",
"google-cloud-translate>=3.15.1",
"pyrallis>=0.3.1",
"wandb>=0.16.0",
"transformers>=4.35.2",
"torch>=2.1.1",
"datasets>=2.15.0",
"numpy>=1.26.2",
"accelerate>=0.26.1",
"scikit-learn>=1.3.2",
"evaluate>=0.4.1",
"rouge-score>=0.1.2",
"termcolor>=2.4.0",
"bitsandbytes>=0.42.0",
"nbdime>=4.0.1",
"steering-vectors>=0.3.0",
"openai>=1.10.0",
"arrr>=1.0.4",
"spacy>=3.7.2",
"mosestokenizer>=1.2.1",
"gradio>=4.16.0",
"simple-parsing>=0.1.5",
"torchmetrics>=1.3.0.post0",
"umap-learn>=0.5.5",
"google-cloud-translate>=3.15.1",
]

[tool.black]
Expand All @@ -60,7 +60,7 @@ select = [
"I",
]
exclude = ["third_party", ".venv"]
ignore = ["F841", "E501", "E741", "I001"]
ignore = ["F841", "E501", "E741", "I001", "E731"]

[tool.ruff.extend-per-file-ignores]
"__init__.py" = ["F401", "F403", "E402"]
Expand Down Expand Up @@ -91,18 +91,18 @@ testpaths = ["tests"]

[tool.pdm.dev-dependencies]
dev = [
"notebook>=7.0.6",
"ipykernel>=6.27.1",
"matplotlib>=3.8.2",
"seaborn>=0.13.0",
"ffmpeg>=1.4",
"pytest>=7.4.3",
"black>=23.11.0",
"ruff>=0.1.6",
"pyright>=1.1.338",
"jupyter>=1.0.0",
"pre-commit>=3.5.0",
"syrupy>=4.6.0",
"pyright>=1.1.339",
"pytest-mock>=3.12.0",
"notebook>=7.0.6",
"ipykernel>=6.27.1",
"matplotlib>=3.8.2",
"seaborn>=0.13.0",
"ffmpeg>=1.4",
"pytest>=7.4.3",
"black>=23.11.0",
"ruff>=0.1.6",
"pyright>=1.1.338",
"jupyter>=1.0.0",
"pre-commit>=3.5.0",
"syrupy>=4.6.0",
"pyright>=1.1.339",
"pytest-mock>=3.12.0",
]
1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/fr/mwe/corrigible-neutral-HHH.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/fr/mwe/myopic-reward.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/fr/mwe/power-seeking-inclination.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/ja/mwe/corrigible-neutral-HHH.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/ja/mwe/myopic-reward.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/ja/mwe/power-seeking-inclination.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/pirate/mwe/corrigible-neutral-HHH.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/pirate/mwe/myopic-reward.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/pirate/mwe/power-seeking-inclination.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/zh/mwe/corrigible-neutral-HHH.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/zh/mwe/myopic-reward.jsonl

Large diffs are not rendered by default.

1,000 changes: 1,000 additions & 0 deletions raw_datasets/translated/zh/mwe/power-seeking-inclination.jsonl

Large diffs are not rendered by default.

42 changes: 31 additions & 11 deletions repepo/data/multiple_choice/make_mwe.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
""" Script to make datasets for Anthropic's Advanced AI Risk Model-Written Evals """

import json
from typing import Any
import re
from typing import Any, cast

from repepo.data.make_dataset import get_dataset_dir, get_raw_dataset_dir
from repepo.data.make_dataset import (
build_dataset_filename,
get_dataset_dir,
get_raw_dataset_dir,
)
from repepo.data.io import jdump
from repepo.core.types import Dataset, Example
from repepo.translation.constants import LangOrStyleCode

_DATASET_PATHS = {
"myopic-reward": "mwe/myopic-reward.jsonl",
"corrigible-neutral-HHH": "mwe/corrigible-neutral-HHH.jsonl",
"power-seeking-inclination": "mwe/power-seeking-inclination.jsonl",
}

def strip_meta_tags(text: str) -> str:
"""Strip meta tags from text"""
return re.sub(r"<META_START>[^<]*<META_END>", "", text)


def convert_mwe_dataset(
Expand All @@ -23,7 +28,7 @@ def convert_mwe_dataset(
mwe_dataset.append(
Example(
instruction="",
input=element["question"],
input=strip_meta_tags(element["question"]),
# Curse you American spelling!
output=element["answer_matching_behavior"],
incorrect_outputs=[element["answer_not_matching_behavior"]],
Expand All @@ -35,15 +40,30 @@ def convert_mwe_dataset(

def make_mwe():
"""Make MWE dataset"""
for dataset_name, dataset_path in _DATASET_PATHS.items():
dataset_path = get_raw_dataset_dir() / dataset_path
for dataset_path in get_raw_dataset_dir().glob("mwe/*.jsonl"):
with open(dataset_path, "r") as jsonfile:
list_dataset = [json.loads(line) for line in jsonfile]

dataset_name = dataset_path.stem
dataset_info = {"behavior": dataset_name}
filename = build_dataset_filename(dataset_name)
mwe_dataset: Dataset = convert_mwe_dataset(list_dataset, dataset_info)
jdump(mwe_dataset, get_dataset_dir() / filename)


def make_mwe_translations():
for dataset_path in get_raw_dataset_dir().glob("translated/*/mwe/*.jsonl"):
with open(dataset_path, "r") as jsonfile:
list_dataset = [json.loads(line) for line in jsonfile]

dataset_name = dataset_path.stem
dataset_info = {"behavior": dataset_name}
lang_or_style = cast(LangOrStyleCode, dataset_path.parent.parent.stem)
filename = build_dataset_filename(dataset_name, lang_or_style=lang_or_style)
mwe_dataset: Dataset = convert_mwe_dataset(list_dataset, dataset_info)
jdump(mwe_dataset, get_dataset_dir() / f"{dataset_name}.json")
jdump(mwe_dataset, get_dataset_dir() / filename)


if __name__ == "__main__":
make_mwe()
make_mwe_translations()
3 changes: 1 addition & 2 deletions repepo/data/translate/translate_mwe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def translate(
dataset: list[Any],
lang_or_style: LangOrStyleCode,
is_persona: bool,
max_workers: int = 10,
max_workers: int = 100,
show_progress: bool = True,
) -> list[Any]:
"""
Expand Down Expand Up @@ -55,7 +55,6 @@ def main(force: bool = False, persona_only: bool = False) -> None:
list_dataset,
lang_or_style,
is_persona=is_persona,
max_workers=20,
)
# create the target directory if it doesn't exist
target_path.parent.mkdir(parents=True, exist_ok=True)
Expand Down
Loading

0 comments on commit 46927b0

Please sign in to comment.