pyproject.toml

# build requirements
[build-system]
requires = ["setuptools < 68.0.0"]
build-backend = "setuptools.build_meta"

# iSort
[tool.isort]
multi_line_output = 0
line_length = 80
skip = [ "env", "wandb", "runs", "build", "node_modules" ]
include_trailing_comma = true
split_on_trailing_comma = true

[tool.ruff.lint]
select = [
    "C4",
    # TODO port pydocstyle
    # "D", # pydocstyle
    "LOG",
    "PERF",
    "PLE",
    "COM812",
]
[tool.ruff]
exclude = [
    "build/**",
    "docs/**",
    "node_modules/**",
]

# Coverage
[tool.coverage.run]
parallel = true
branch = true
relative_files = true
concurrency = ["thread"]
include = [
    "llmfoundry/*"
]


# Pyright
[tool.pyright]
exclude = ['env-**', 'venv*', '.venv']
stubPath = ""  # suppress useless 'stubPath is not a valid directory' errors

reportUnnecessaryIsInstance = "none" # it is ok to do this for clarity or safety
reportMissingTypeStubs = "none"
reportIncompatibleMethodOverride = "none"
reportIncompatibleVariableOverride = "error"
reportUnusedImport = "error"
reportUnusedClass = "warning"
reportUnusedFunction = "warning"
reportUnusedVariable = "error"
reportDuplicateImport = "error"
reportWildcardImportFromLibrary = "error"
reportUntypedFunctionDecorator = "warning"
reportPrivateImportUsage = "none"
reportUndefinedVariable = "error"
strictParameterNoneValue = true
reportPropertyTypeMismatch = "error"
reportUntypedNamedTuple = "error"
reportUnnecessaryCast = "error"
reportInvalidTypeVarUse = "error"
reportOverlappingOverload = "error"
reportUninitializedInstanceVariable = "error"
reportInvalidStringEscapeSequence = "error"
reportMissingParameterType = "error"
reportCallInDefaultInitializer = "error"
reportUnnecessaryComparison = "error"
reportSelfClsParameterName = "error"
reportImplicitStringConcatenation = "warning"  # TODO: make this an error
reportInvalidStubStatement = "error"
reportIncompleteStub = "error"
reportUnsupportedDunderAll = "error"
reportUnusedCoroutine = "error"
reportMissingImports = "none"

# Pytest
[tool.pytest.ini_options]
# By default, skip gpu tests
addopts = "--tb=short -m 'not gpu'"

markers = [
    # For distributed testing
    "world_size(val)",
    # Should be run during daily regression
    "daily",
    # Whether the test will be reading data from a remote source, and may require credentials
    "remote",
    # whether the test requires a gpu
    "gpu",
]

filterwarnings = [
    # "error",  # warnings should be treated like errors, but still need to fix some warnings
    'ignore:ExtraArgumentWarning',  # extra arguments originate from pytest-specific CLI args
    'ignore:DistributedDefaultValueWarning',  # default distributed values are fine
    'ignore:NoDistributedWarning',  # running without distributed is fine
    'ignore:Deterministic mode is activated:UserWarning',  # all tests run with deterministic mode
    'ignore:SubsetNumBatchesWarning',  # different subsets OK for testing
    'ignore:No optimizer:UserWarning',  # testing defaults
    'ignore:No scheduler:UserWarning',  # testing defaults
    'ignore::DeprecationWarning:tensorboard',  # ignore tensorboard
]

# Yapf
[tool.yapf]
# Align closing bracket with visual indentation.
align_closing_bracket_with_visual_indent = false

# Allow dictionary keys to exist on multiple lines. For example:
#
#   x = {
#       ('this is the first element of a tuple',
#        'this is the second element of a tuple'):
#            value,
#   }
allow_multiline_dictionary_keys = false

# Allow lambdas to be formatted on more than one line.
allow_multiline_lambdas = false

# Allow splitting before a default / named assignment in an argument list.
allow_split_before_default_or_named_assigns = true

# Allow splits before the dictionary value.
allow_split_before_dict_value = true

#   Let spacing indicate operator precedence. For example:
#
#     a = 1 * 2 + 3 / 4
#     b = 1 / 2 - 3 * 4
#     c = (1 + 2) * (3 - 4)
#     d = (1 - 2) / (3 + 4)
#     e = 1 * 2 - 3
#     f = 1 + 2 + 3 + 4
#
# will be formatted as follows to indicate precedence:
#
#     a = 1*2 + 3/4
#     b = 1/2 - 3*4
#     c = (1+2) * (3-4)
#     d = (1-2) / (3+4)
#     e = 1*2 - 3
#     f = 1 + 2 + 3 + 4
#
arithmetic_precedence_indication = false

# Number of blank lines surrounding top-level function and class
# definitions.
blank_lines_around_top_level_definition = 2

# Insert a blank line before a class-level docstring.
blank_line_before_class_docstring = false

# Insert a blank line before a module docstring.
blank_line_before_module_docstring = true

# Insert a blank line before a 'def' or 'class' immediately nested
# within another 'def' or 'class'. For example:
#
#   class Foo:
#                      # <------ this blank line
#     def method():
#       ...
blank_line_before_nested_class_or_def = true

# Do not split consecutive brackets. Only relevant when
# dedent_closing_brackets is set. For example:
#
#    call_func_that_takes_a_dict(
#        {
#            'key1': 'value1',
#            'key2': 'value2',
#        }
#    )
#
# would reformat to:
#
#    call_func_that_takes_a_dict({
#        'key1': 'value1',
#        'key2': 'value2',
#    })
coalesce_brackets = true

# The column limit.
column_limit = 80

# The style for continuation alignment. Possible values are:
#
# - SPACE: Use spaces for continuation alignment. This is default behavior.
# - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns
#   (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or
#   CONTINUATION_INDENT_WIDTH spaces) for continuation alignment.
# - VALIGN-RIGHT: Vertically align continuation lines to multiple of
#   INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if
#   cannot vertically align continuation lines with indent characters.
continuation_align_style = 'SPACE'

# Indent width used for line continuations.
continuation_indent_width = 4

# Put closing brackets on a separate line, dedented, if the bracketed
# expression can't fit in a single line. Applies to all kinds of brackets,
# including function definitions and calls. For example:
#
#   config = {
#       'key1': 'value1',
#       'key2': 'value2',
#   }        # <--- this bracket is dedented and on a separate line
#
#   time_series = self.remote_client.query_entity_counters(
#       entity='dev3246.region1',
#       key='dns.query_latency_tcp',
#       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
#       start_ts=now()-timedelta(days=3),
#       end_ts=now(),
#   )        # <--- this bracket is dedented and on a separate line
dedent_closing_brackets = true

# Disable the heuristic which places each list element on a separate line
# if the list is comma-terminated.
disable_ending_comma_heuristic = false

# Place each dictionary entry onto its own line.
each_dict_entry_on_separate_line = true

# Require multiline dictionary even if it would normally fit on one line.
# For example:
#
#   config = {
#       'key1': 'value1'
#   }
force_multiline_dict = false

# The regex for an i18n comment. The presence of this comment stops
# reformatting of that line, because the comments are required to be
# next to the string they translate.
i18n_comment = '#\..*'

# The i18n function call names. The presence of this function stops
# reformattting on that line, because the string it has cannot be moved
# away from the i18n comment.
i18n_function_call = 'N_, _'

# Indent blank lines.
indent_blank_lines = false

# Put closing brackets on a separate line, indented, if the bracketed
# expression can't fit in a single line. Applies to all kinds of brackets,
# including function definitions and calls. For example:
#
#   config = {
#       'key1': 'value1',
#       'key2': 'value2',
#       }        # <--- this bracket is indented and on a separate line
#
#   time_series = self.remote_client.query_entity_counters(
#       entity='dev3246.region1',
#       key='dns.query_latency_tcp',
#       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
#       start_ts=now()-timedelta(days=3),
#       end_ts=now(),
#       )        # <--- this bracket is indented and on a separate line
indent_closing_brackets = false

# Indent the dictionary value if it cannot fit on the same line as the
# dictionary key. For example:
#
#   config = {
#       'key1':
#           'value1',
#       'key2': value1 +
#               value2,
#   }
indent_dictionary_value = true

# The number of columns to use for indentation.
indent_width = 4

# Join short lines into one line. E.g., single line 'if' statements.
join_multiple_lines = false

# Do not include spaces around selected binary operators. For example:
#
#   1 + 2 * 3 - 4 / 5
#
# will be formatted as follows when configured with "*,/":
#
#   1 + 2*3 - 4/5
no_spaces_around_selected_binary_operators = ''

# Use spaces around default or named assigns.
spaces_around_default_or_named_assign = false

# Adds a space after the opening '{' and before the ending '}' dict delimiters.
#
#   {1: 2}
#
# will be formatted as:
#
#   { 1: 2 }
spaces_around_dict_delimiters = false

# Adds a space after the opening '[' and before the ending ']' list delimiters.
#
#   [1, 2]
#
# will be formatted as:
#
#   [ 1, 2 ]
spaces_around_list_delimiters = false

# Use spaces around the power operator.
spaces_around_power_operator = false

# Use spaces around the subscript / slice operator.  For example:
#
#   my_list[1 : 10 : 2]
spaces_around_subscript_colon = false

# Adds a space after the opening '(' and before the ending ')' tuple delimiters.
#
#   (1, 2, 3)
#
# will be formatted as:
#
#   ( 1, 2, 3 )
spaces_around_tuple_delimiters = false

# The number of spaces required before a trailing comment.
# This can be a single value (representing the number of spaces
# before each trailing comment) or list of values (representing
# alignment column values; trailing comments within a block will
# be aligned to the first column value that is greater than the maximum
# line length within the block). For example:
#
# With spaces_before_comment=5:
#
#   1 + 1 # Adding values
#
# will be formatted as:
#
#   1 + 1     # Adding values <-- 5 spaces between the end of the statement and comment
#
# With spaces_before_comment = '15, 20:'
#
#   1 + 1 # Adding values
#   two + two # More adding
#
#   longer_statement # This is a longer statement
#   short # This is a shorter statement
#
#   a_very_long_statement_that_extends_beyond_the_final_column # Comment
#   short # This is a shorter statement
#
# will be formatted as:
#
#   1 + 1          # Adding values <-- end of line comments in block aligned to col 15
#   two + two      # More adding
#
#   longer_statement    # This is a longer statement <-- end of line comments in block aligned to col 20
#   short               # This is a shorter statement
#
#   a_very_long_statement_that_extends_beyond_the_final_column  # Comment <-- the end of line comments are aligned based on the line length
#   short                                                       # This is a shorter statement
#
spaces_before_comment = 2

# Insert a space between the ending comma and closing bracket of a list,
# etc.
space_between_ending_comma_and_closing_bracket = false

# Use spaces inside brackets, braces, and parentheses.  For example:
#
#   method_call( 1 )
#   my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ]
#   my_set = { 1, 2, 3 }
space_inside_brackets = false

# Split before arguments
split_all_comma_separated_values = false

# Split before arguments, but do not split all subexpressions recursively
# (unless needed).
split_all_top_level_comma_separated_values = false

# Split before arguments if the argument list is terminated by a
# comma.
split_arguments_when_comma_terminated = true

# Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@'
# rather than after.
split_before_arithmetic_operator = false

# Set to True to prefer splitting before '&', '|' or '^' rather than
# after.
split_before_bitwise_operator = false

# Split before the closing bracket if a list or dict literal doesn't fit on
# a single line.
split_before_closing_bracket = true

# Split before a dictionary or set generator (comp_for). For example, note
# the split before the 'for':
#
#   foo = {
#       variable: 'Hello world, have a nice day!'
#       for variable in bar if variable != 42
#   }
split_before_dict_set_generator = false

# Split before the '.' if we need to split a longer expression:
#
#   foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d))
#
# would reformat to something like:
#
#   foo = ('This is a really long string: {}, {}, {}, {}'
#          .format(a, b, c, d))
split_before_dot = false

# Split after the opening paren which surrounds an expression if it doesn't
# fit on a single line.
split_before_expression_after_opening_paren = false

# If an argument / parameter list is going to be split, then split before
# the first argument.
split_before_first_argument = false

# Set to True to prefer splitting before 'and' or 'or' rather than
# after.
split_before_logical_operator = false

# Split named assignments onto individual lines.
split_before_named_assigns = true

# Set to True to split list comprehensions and generators that have
# non-trivial expressions and multiple clauses before each of these
# clauses. For example:
#
#   result = [
#       a_long_var + 100 for a_long_var in xrange(1000)
#       if a_long_var % 10]
#
# would reformat to something like:
#
#   result = [
#       a_long_var + 100
#       for a_long_var in xrange(1000)
#       if a_long_var % 10]
split_complex_comprehension = true

# The penalty for splitting right after the opening bracket.
split_penalty_after_opening_bracket = 300

# The penalty for splitting the line after a unary operator.
split_penalty_after_unary_operator = 10000

# The penalty of splitting the line around the '+', '-', '*', '/', '//',
# ``%``, and '@' operators.
split_penalty_arithmetic_operator = 300

# The penalty for splitting right before an if expression.
split_penalty_before_if_expr = 0

# The penalty of splitting the line around the '&', '|', and '^'
# operators.
split_penalty_bitwise_operator = 300

# The penalty for splitting a list comprehension or generator
# expression.
split_penalty_comprehension = 2100

# The penalty for characters over the column limit.
split_penalty_excess_character = 7000

# The penalty incurred by adding a line split to the unwrapped line. The
# more line splits added the higher the penalty.
split_penalty_for_added_line_split = 20

# The penalty of splitting a list of "import as" names. For example:
#
#   from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
#                                                             long_argument_2,
#                                                             long_argument_3)
#
# would reformat to something like:
#
#   from a_very_long_or_indented_module_name_yada_yad import (
#       long_argument_1, long_argument_2, long_argument_3)
split_penalty_import_names = 0

# The penalty of splitting the line around the 'and' and 'or'
# operators.
split_penalty_logical_operator = 300

# Use the Tab character for indentation.
use_tabs = false

# Ignore directories
[tool.yapfignore]
ignore_patterns = [
    "runs/**/*.py",
    "wandb/**/*.py",
    "build/**/*.py",
]

[tool.pydocstyle]
convention="google"
add_ignore="D100,D101,D102,D103,D104,D105,D107,D400,D401,D415"
add_select="D404"