-
-
Notifications
You must be signed in to change notification settings - Fork 636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
"Loop subgraph" panic on main. #15653
Comments
This bisects to 7eea60e:
|
A (shlex-mangled) repro command looks like: ./pants --concurrent '--python-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' --autoflake-version=autoflake==1.4 '--autoflake-extra-requirements=[]' --autoflake-lockfile=src/python/pants/backend/python/lint/autoflake/autoflake.lock '--autoflake-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--bandit-version=bandit>=1.7.0,<1.8' '--bandit-extra-requirements=['"'"'setuptools'"'"', '"'"'GitPython==3.1.18'"'"']' --bandit-lockfile=src/python/pants/backend/python/lint/bandit/bandit.lock '--backend-packages=+['"'"'pants.backend.python.lint.bandit'"'"']' --black-version=black==22.1.0 '--black-extra-requirements=['"'"'typing-extensions>=3.10.0.0; python_version < "3.10"'"'"']' --black-lockfile=src/python/pants/backend/python/lint/black/black.lock '--black-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--docformatter-version=docformatter>=1.4,<1.5' '--docformatter-extra-requirements=[]' --docformatter-lockfile=src/python/pants/backend/python/lint/docformatter/docformatter.lock '--docformatter-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--flake8-version=flake8>=3.9.2,<4.0' '--flake8-extra-requirements=[]' --flake8-lockfile=src/python/pants/backend/python/lint/flake8/flake8.lock '--flake8-source-plugins=[]' '--isort-version=isort[pyproject,colors]>=5.9.3,<6.0' '--isort-extra-requirements=[]' --isort-lockfile=src/python/pants/backend/python/lint/isort/isort.lock '--isort-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--pylint-version=pylint>=2.11.0,<2.12' '--pylint-extra-requirements=[]' --pylint-lockfile=src/python/pants/backend/python/lint/pylint/pylint.lock '--pylint-source-plugins=[]' '--backend-packages=+['"'"'pants.backend.python.lint.pylint'"'"']' --yapf-version=yapf==0.32.0 '--yapf-extra-requirements=['"'"'toml'"'"']' --yapf-lockfile=src/python/pants/backend/python/lint/yapf/yapf.lock '--yapf-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--backend-packages=+['"'"'pants.backend.python.lint.yapf'"'"']' '--pyupgrade-version=pyupgrade>=2.31.0,<2.32' '--pyupgrade-extra-requirements=[]' --pyupgrade-lockfile=src/python/pants/backend/python/lint/pyupgrade/pyupgrade.lock '--pyupgrade-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--backend-packages=+['"'"'pants.backend.experimental.python.lint.pyupgrade'"'"']' --ipython-version=ipython==7.16.1 '--ipython-extra-requirements=[]' --ipython-lockfile=src/python/pants/backend/python/subsystems/ipython.lock '--setuptools-version=setuptools>=50.3.0,<58.0' '--setuptools-extra-requirements=['"'"'wheel>=0.35.1,<0.38'"'"']' --setuptools-lockfile=src/python/pants/backend/python/subsystems/setuptools.lock --setuptools-scm-version=setuptools-scm==6.4.2 '--setuptools-scm-extra-requirements=[]' --setuptools-scm-lockfile=src/python/pants/backend/python/subsystems/setuptools_scm.lock '--setuptools-scm-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--sphinx-version=sphinx>=4.5.0,<4.6' '--sphinx-extra-requirements=['"'"'setuptools'"'"']' --sphinx-lockfile=src/python/pants/backend/python/docs/sphinx/sphinx.lock '--sphinx-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' --mypy-version=mypy==0.950 '--mypy-extra-requirements=[]' --mypy-lockfile=src/python/pants/backend/python/typecheck/mypy/mypy.lock '--mypy-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--mypy-source-plugins=[]' --mypy-protobuf-version=mypy-protobuf==2.10 '--mypy-protobuf-extra-requirements=[]' --mypy-protobuf-lockfile=src/python/pants/backend/codegen/protobuf/python/mypy_protobuf.lock '--mypy-protobuf-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--backend-packages=+['"'"'pants.backend.codegen.protobuf.python'"'"']' --lambdex-version=lambdex==0.1.6 '--lambdex-extra-requirements=[]' --lambdex-lockfile=src/python/pants/backend/python/subsystems/lambdex.lock '--lambdex-interpreter-constraints=['"'"'CPython>=3.7,<3.10'"'"']' '--backend-packages=+['"'"'pants.backend.awslambda.python'"'"']' --pytest-version=pytest==7.0.1 '--pytest-extra-requirements=['"'"'pytest-cov>=2.12,!=2.12.1,<3.1'"'"']' --pytest-lockfile=src/python/pants/backend/python/subsystems/pytest.lock '--coverage-py-version=coverage[toml]>=5.5,<5.6' '--coverage-py-extra-requirements=[]' --coverage-py-lockfile=src/python/pants/backend/python/subsystems/coverage_py.lock '--coverage-py-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' --terraform-hcl2-parser-version=python-hcl2==3.0.5 '--terraform-hcl2-parser-extra-requirements=[]' --terraform-hcl2-parser-lockfile=src/python/pants/backend/terraform/hcl2.lock '--terraform-hcl2-parser-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--backend-packages=+['"'"'pants.backend.experimental.terraform'"'"']' --dockerfile-parser-version=dockerfile==3.2.0 '--dockerfile-parser-extra-requirements=[]' --dockerfile-parser-lockfile=src/python/pants/backend/docker/subsystems/dockerfile.lock '--dockerfile-parser-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--backend-packages=+['"'"'pants.backend.docker'"'"']' '--twine-version=twine>=3.7.1,<3.8' '--twine-extra-requirements=['"'"'colorama>=0.4.3'"'"']' --twine-lockfile=src/python/pants/backend/python/subsystems/twine.lock '--twine-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' --clang-format-version=clang-format==14.0.3 '--clang-format-extra-requirements=[]' --clang-format-lockfile=src/python/pants/backend/cc/lint/clangformat/clangformat.lock '--clang-format-interpreter-constraints=['"'"'CPython>=3.7,<4'"'"']' '--backend-packages=+['"'"'pants.backend.experimental.cc.lint.clangformat'"'"']' --junit-version=5.7.2 '--junit-artifacts=('"'"'org.junit.platform:junit-platform-console:1.7.2'"'"', '"'"'org.junit.jupiter:junit-jupiter-engine:{version}'"'"', '"'"'org.junit.vintage:junit-vintage-engine:{version}'"'"')' --junit-lockfile=src/python/pants/jvm/test/junit.default.lockfile.txt --google-java-format-version=1.13.0 '--google-java-format-artifacts=('"'"'com.google.googlejavaformat:google-java-format:{version}'"'"',)' --google-java-format-lockfile=src/python/pants/backend/java/lint/google_java_format/google_java_format.default.lockfile.txt --scalafmt-version=3.2.1 '--scalafmt-artifacts=('"'"'org.scalameta:scalafmt-cli_2.13:{version}'"'"',)' --scalafmt-lockfile=src/python/pants/backend/scala/lint/scalafmt/scalafmt.default.lockfile.txt --scalapb-version=0.11.6 '--scalapb-artifacts=('"'"'com.thesamet.scalapb:scalapbc_2.13:{version}'"'"',)' --scalapb-lockfile=src/python/pants/backend/codegen/protobuf/scala/scalapbc.default.lockfile.txt '--backend-packages=+['"'"'pants.backend.experimental.codegen.protobuf.scala'"'"']' --scalatest-version=3.2.10 '--scalatest-artifacts=('"'"'org.scalatest:scalatest_2.13:{version}'"'"',)' --scalatest-lockfile=src/python/pants/backend/scala/subsystems/scalatest.default.lockfile.txt --scrooge-version=21.12.0 '--scrooge-artifacts=('"'"'com.twitter:scrooge-generator_2.13:{version}'"'"',)' --scrooge-lockfile=src/python/pants/backend/codegen/thrift/scrooge/scrooge.default.lockfile.txt '--backend-packages=+['"'"'pants.backend.experimental.codegen.thrift.scrooge.scala'"'"']' --java-avro-version=1.11.0 '--java-avro-artifacts=('"'"'org.apache.avro:avro-tools:{version}'"'"',)' --java-avro-lockfile=src/python/pants/backend/codegen/avro/java/avro-tools.default.lockfile.txt '--backend-packages=+['"'"'pants.backend.experimental.codegen.avro.java'"'"']' --ktlint-version=0.45.2 '--ktlint-artifacts=('"'"'com.pinterest:ktlint:{version}'"'"',)' --ktlint-lockfile=src/python/pants/backend/kotlin/lint/ktlint/ktlint.lock '--backend-packages=+['"'"'pants.backend.experimental.kotlin.lint.ktlint'"'"']' generate-lockfiles '--resolve=['"'"'lambdex'"'"']' |
This is the minimal repro I find:
Remove any single backend (except 'pants.backend.awslambda.python' which is required for this), and things are green. |
Ok, with this diff: $ git diff src/rust/
diff --git a/src/rust/engine/rule_graph/src/builder.rs b/src/rust/engine/rule_graph/src/builder.rs
index e60873dfb..535e897ed 100644
--- a/src/rust/engine/rule_graph/src/builder.rs
+++ b/src/rust/engine/rule_graph/src/builder.rs
@@ -5,6 +5,7 @@ use crate::rules::{DependencyKey, ParamTypes, Query, Rule};
use crate::{params_str, Entry, EntryWithDeps, InnerEntry, RootEntry, RuleEdges, RuleGraph};
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
+use std::env;
use indexmap::IndexSet;
use internment::Intern;
@@ -478,6 +479,7 @@ impl<R: Rule> Builder<R> {
let mut iteration = 0;
let mut maybe_in_loop = HashSet::new();
let mut looping = false;
+ let looping_threshold = env::var("PANTS_RG_LOOP_THRESH").map_or(100000, |v| v.parse::<usize>().unwrap());
while let Some(node_id) = to_visit.pop() {
let node = if let Some(node) = graph[node_id].inner() {
node
@@ -500,7 +502,7 @@ impl<R: Rule> Builder<R> {
}
iteration += 1;
- if iteration > 100000 {
+ if iteration > looping_threshold {
looping = true;
}
if iteration % 1000 == 0 {
@@ -665,7 +667,7 @@ impl<R: Rule> Builder<R> {
log::trace!("{}", trace_str);
maybe_in_loop.insert(node_id);
- if maybe_in_loop.len() > 5 {
+ if maybe_in_loop.len() > env::var("PANTS_RG_LOOP_MAX").map_or(5, |v| v.parse::<usize>().unwrap()) {
let subgraph = graph.filter_map(
|node_id, node| {
if maybe_in_loop.contains(&node_id) {
@@ -678,7 +680,8 @@ impl<R: Rule> Builder<R> {
);
panic!(
- "Loop subgraph: {}",
+ "Loop subgraph [{} in loop]: {}",
+ maybe_in_loop.len(),
petgraph::dot::Dot::with_config(&subgraph, &[])
);
} I find non-deterministic success bumping up loop thresholds; so I'm not really sure if the bumps are effective or they just expose randomness better as they get bigger?: Good:
Then
|
I see deterministic success with a higher "looping" threshold... I think that you might have been running into the fact that we prune all env vars in With your patch applied and PANTS_RG_LOOP_THRESH=1000000 ./pants --no-pantsd --backend-packages="+['pants.backend.awslambda.python','pants.backend.python.lint.bandit','pants.backend.python.lint.pylint','pants.backend.python.lint.yapf','pants.backend.experimental.python.lint.pyupgrade','pants.backend.codegen.protobuf.python']" help Given the env var filtering issue, and since I'm hoping to be able to resume #11269 this summer, I'll likely bump the hardcoded loop threshold for now. |
Can't be. Are they pruned for the initial pantsd bring up? As noted I killed pantsd between all attempts. |
Yes, because pants/src/python/pants/pantsd/pants_daemon.py Lines 176 to 181 in b251df7
|
So my success vs failure was non-deterministic using existing thresholds. Is that expected? That the algorithm is non-deterministic? |
I'm not sure. But I suspect that one of your successful attempts was using an outdated instance of
It is expected in that we are using Rust's default randomized hash function... but I've never known it to affect success or failure. #11269 switches to deterministic hashing for that and other reasons. |
On main after your fix But, with ~my diff re-applied: $ git diff src/rust/
diff --git a/src/rust/engine/rule_graph/src/builder.rs b/src/rust/engine/rule_graph/src/builder.rs
index 4c9ef3141..2de045e36 100644
--- a/src/rust/engine/rule_graph/src/builder.rs
+++ b/src/rust/engine/rule_graph/src/builder.rs
@@ -5,6 +5,7 @@ use crate::rules::{DependencyKey, ParamTypes, Query, Rule};
use crate::{params_str, Entry, EntryWithDeps, InnerEntry, RootEntry, RuleEdges, RuleGraph};
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
+use std::env;
use indexmap::IndexSet;
use internment::Intern;
@@ -478,6 +479,9 @@ impl<R: Rule> Builder<R> {
let mut iteration = 0;
let mut maybe_in_loop = HashSet::new();
let mut looping = false;
+ let loop_thresh =
+ env::var("PANTS_RG_LOOP_THRESH").map_or(10000000, |v| v.parse::<usize>().unwrap());
+ let loop_max = env::var("PANTS_RG_LOOP_MAX").map_or(5, |v| v.parse::<usize>().unwrap());
while let Some(node_id) = to_visit.pop() {
let node = if let Some(node) = graph[node_id].inner() {
node
@@ -507,7 +511,7 @@ impl<R: Rule> Builder<R> {
// See https://github.com/pantsbuild/pants/issues/11269 for plans to improve this
// implementation.
iteration += 1;
- if iteration > 10000000 {
+ if iteration > loop_thresh {
looping = true;
}
if iteration % 1000 == 0 {
@@ -672,7 +676,7 @@ impl<R: Rule> Builder<R> {
log::trace!("{}", trace_str);
maybe_in_loop.insert(node_id);
- if maybe_in_loop.len() > 5 {
+ if maybe_in_loop.len() > loop_max {
let subgraph = graph.filter_map(
|node_id, node| {
if maybe_in_loop.contains(&node_id) {
@@ -685,7 +689,9 @@ impl<R: Rule> Builder<R> {
);
panic!(
- "Loop subgraph: {}",
+ "Loop subgraph [thresh={}, max={}]: {}",
+ loop_thresh,
+ loop_max,
petgraph::dot::Dot::with_config(&subgraph, &[])
);
} I get:
And for total clarity - odd numbers:
So env vars do get through somehow.
|
Your example lowers the value to |
My what a tangle. Ok. Gotcha. |
@stuhood you're partially right. The complicating factor is there is at least some rule graph construction that occurs before daemonization and that can read env vars:
That was not an issue in my original tests since even the default 5/100000 thresholds were enough to allow that initial rule graph construction to work. This example here uses such ridiculously low values, it doesn't. A mazy tangle! |
On main @ d39465b I consistently see a Loop subgraph panic from here:
pants/src/rust/engine/rule_graph/src/builder.rs
Lines 680 to 683 in d39465b
The text was updated successfully, but these errors were encountered: