Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable autotag feature w/ wandb #411

Merged
merged 4 commits into from
Jun 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions trl/trainer/ppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,61 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import subprocess
import warnings
from dataclasses import dataclass, field
from typing import Optional

import numpy as np
import requests

from ..core import flatten_dict


def autotag() -> str:
wandb_tag = ""
logging.info("autotag feature is enabled")
try:
git_tag = subprocess.check_output(["git", "describe", "--tags"]).decode("ascii").strip()
wandb_tag = f"{git_tag}"
logging.info(f"identified git tag: {git_tag}")
except subprocess.CalledProcessError:
return wandb_tag

git_commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"]).decode("ascii").strip()
try:
# if the current branch is not main, try find the PR number
git_branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]).decode("ascii").strip()
if git_branch != "main":
# try finding the pull request number on github
prs = requests.get(f"https://api.github.com/search/issues?q=repo:lvwerra/trl+is:pr+{git_commit}")
if prs.status_code == 200:
prs = prs.json()
if len(prs["items"]) > 0:
pr = prs["items"][0]
pr_number = pr["number"]
wandb_tag += f",pr-{pr_number}"
logging.info(f"identified github pull request: {pr_number}")
else:
logging.info("current branch is main, not searching for pull request")
except Exception as e:
logging.warning(f"Automatic autotag failed with the following error: {e}")

return wandb_tag


@dataclass
class PPOConfig(object):
"""
Configuration class for PPOTrainer
"""

task_name: Optional[str] = field(
default=None,
metadata={"help": "Name of task to use - used only for tracking purposes"},
)
model_name: Optional[str] = field(
default=None,
metadata={"help": "Name of model to use - used only for tracking purposes"},
Expand Down Expand Up @@ -119,6 +159,15 @@ def __post_init__(self):
# raise error if wandb is not installed
try:
import wandb # noqa: F401

existing_wandb_tag = os.environ.get("WANDB_TAGS", "")
wandb_tag = autotag()
if len(wandb_tag) > 0:
if len(existing_wandb_tag) > 0:
os.environ["WANDB_TAGS"] = ",".join([existing_wandb_tag, wandb_tag])
else:
os.environ["WANDB_TAGS"] = wandb_tag
logging.info(f"the following tags will be used for wandb logging: {os.environ['WANDB_TAGS']}")
except ImportError:
raise ImportError(
"Please install wandb to use wandb logging. You can do this by running `pip install wandb`."
Expand Down