-
Notifications
You must be signed in to change notification settings - Fork 294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix variable fetching on remote machines #4064
Changes from all commits
dcf2970
f1230af
3c4558a
aef1f0b
6ee5562
3cd15f8
0b1b1b6
cbda46a
2616fe0
c8c2f45
a34e1bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Fix variable fetching on remote machines that don't have our scripts files on them. |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,98 @@ | ||||||||||||
import pandas as _VSCODE_pd | ||||||||||||
import builtins as _VSCODE_builtins | ||||||||||||
import json as _VSCODE_json | ||||||||||||
import pandas.io.json as _VSCODE_pd_json | ||||||||||||
|
||||||||||||
# Function that converts the var passed in into a pandas data frame if possible | ||||||||||||
def _VSCODE_convertToDataFrame(df): | ||||||||||||
if isinstance(df, list): | ||||||||||||
df = _VSCODE_pd.DataFrame(df) | ||||||||||||
elif isinstance(df, _VSCODE_pd.Series): | ||||||||||||
df = _VSCODE_pd.Series.to_frame(df) | ||||||||||||
elif isinstance(df, dict): | ||||||||||||
df = _VSCODE_pd.Series(df) | ||||||||||||
df = _VSCODE_pd.Series.to_frame(df) | ||||||||||||
elif hasattr(df, "toPandas"): | ||||||||||||
df = df.toPandas() | ||||||||||||
else: | ||||||||||||
"""Disabling bandit warning for try, except, pass. We want to swallow all exceptions here to not crash on | ||||||||||||
variable fetching""" | ||||||||||||
try: | ||||||||||||
temp = _VSCODE_pd.DataFrame(df) | ||||||||||||
df = temp | ||||||||||||
except: # nosec | ||||||||||||
pass | ||||||||||||
return df | ||||||||||||
|
||||||||||||
|
||||||||||||
# Function to compute row count for a value | ||||||||||||
def _VSCODE_getRowCount(var): | ||||||||||||
if hasattr(var, "shape"): | ||||||||||||
try: | ||||||||||||
# Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it | ||||||||||||
if isinstance(var.shape, tuple): | ||||||||||||
return var.shape[0] | ||||||||||||
except TypeError: | ||||||||||||
return 0 | ||||||||||||
elif hasattr(var, "__len__"): | ||||||||||||
try: | ||||||||||||
return _VSCODE_builtins.len(var) | ||||||||||||
except TypeError: | ||||||||||||
return 0 | ||||||||||||
|
||||||||||||
|
||||||||||||
# Function to retrieve a set of rows for a data frame | ||||||||||||
def _VSCODE_getDataFrameRows(df, start, end): | ||||||||||||
df = _VSCODE_convertToDataFrame(df) | ||||||||||||
|
||||||||||||
# Turn into JSON using pandas. We use pandas because it's about 3 orders of magnitude faster to turn into JSON | ||||||||||||
rows = df.iloc[start:end] | ||||||||||||
return _VSCODE_pd_json.to_json(None, rows, orient="table", date_format="iso") | ||||||||||||
|
||||||||||||
|
||||||||||||
# Function to get info on the passed in data frame | ||||||||||||
def _VSCODE_getDataFrameInfo(df): | ||||||||||||
df = _VSCODE_convertToDataFrame(df) | ||||||||||||
rowCount = _VSCODE_getRowCount(df) | ||||||||||||
|
||||||||||||
# If any rows, use pandas json to convert a single row to json. Extract | ||||||||||||
# the column names and types from the json so we match what we'll fetch when | ||||||||||||
# we ask for all of the rows | ||||||||||||
if rowCount: | ||||||||||||
try: | ||||||||||||
row = df.iloc[0:1] | ||||||||||||
json_row = _VSCODE_pd_json.to_json(None, row, date_format="iso") | ||||||||||||
columnNames = list(_VSCODE_json.loads(json_row)) | ||||||||||||
except: | ||||||||||||
columnNames = list(df) | ||||||||||||
else: | ||||||||||||
columnNames = list(df) | ||||||||||||
|
||||||||||||
# Compute the index column. It may have been renamed | ||||||||||||
indexColumn = df.index.name if df.index.name else "index" | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a problem with this PR, but this line seems to be causing opening the data viewer from the debug window to fail with the following error on some machines (no root cause yet).
While we're here:
Suggested change
|
||||||||||||
columnTypes = _VSCODE_builtins.list(df.dtypes) | ||||||||||||
|
||||||||||||
# Make sure the index column exists | ||||||||||||
if indexColumn not in columnNames: | ||||||||||||
columnNames.insert(0, indexColumn) | ||||||||||||
columnTypes.insert(0, "int64") | ||||||||||||
|
||||||||||||
# Then loop and generate our output json | ||||||||||||
columns = [] | ||||||||||||
for n in _VSCODE_builtins.range(0, _VSCODE_builtins.len(columnNames)): | ||||||||||||
column_type = columnTypes[n] | ||||||||||||
column_name = str(columnNames[n]) | ||||||||||||
colobj = {} | ||||||||||||
colobj["key"] = column_name | ||||||||||||
colobj["name"] = column_name | ||||||||||||
colobj["type"] = str(column_type) | ||||||||||||
columns.append(colobj) | ||||||||||||
|
||||||||||||
# Save this in our target | ||||||||||||
target = {} | ||||||||||||
target["columns"] = columns | ||||||||||||
target["indexColumn"] = indexColumn | ||||||||||||
target["rowCount"] = rowCount | ||||||||||||
|
||||||||||||
# return our json object as a string | ||||||||||||
return _VSCODE_json.dumps(target) |
This file was deleted.
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,6 @@ | ||
# Query Jupyter server for the info about a dataframe | ||
import json as _VSCODE_json | ||
import pandas as _VSCODE_pd | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Imports that were removed here were all unused in variable fetching code. |
||
import pandas.io.json as _VSCODE_pd_json | ||
import builtins as _VSCODE_builtins | ||
import vscodeDataFrameHelpers as _VSCODE_dataFrameHelpers | ||
|
||
|
||
def _VSCODE_maybeParseTensorShape(var, result): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ import { IDebugService } from '../../common/application/types'; | |
import { traceError } from '../../common/logger'; | ||
import { IConfigurationService, Resource } from '../../common/types'; | ||
import { sendTelemetryEvent } from '../../telemetry'; | ||
import { DataFrameLoading, Identifiers, Telemetry } from '../constants'; | ||
import { DataFrameLoading, GetVariableInfo, Identifiers, Telemetry } from '../constants'; | ||
import { DebugLocationTracker } from '../debugLocationTracker'; | ||
import { | ||
IConditionalJupyterVariables, | ||
|
@@ -28,7 +28,8 @@ export class DebuggerVariables extends DebugLocationTracker | |
implements IConditionalJupyterVariables, DebugAdapterTracker { | ||
private refreshEventEmitter = new EventEmitter<void>(); | ||
private lastKnownVariables: IJupyterVariable[] = []; | ||
private importedIntoKernel = new Set<string>(); | ||
private importedDataFrameScriptsIntoKernel = new Set<string>(); | ||
private importedGetVariableInfoScriptsIntoKernel = new Set<string>(); | ||
private watchedNotebooks = new Map<string, Disposable[]>(); | ||
private debuggingStarted = false; | ||
constructor( | ||
|
@@ -109,7 +110,7 @@ export class DebuggerVariables extends DebugLocationTracker | |
|
||
// Then eval calling the main function with our target variable | ||
const results = await this.evaluate( | ||
`${DataFrameLoading.DataFrameInfoFunc}(${targetVariable.name})`, | ||
`${DataFrameLoading.DataFrameInfoImportFunc}(${targetVariable.name})`, | ||
// tslint:disable-next-line: no-any | ||
(targetVariable as any).frameId | ||
); | ||
|
@@ -154,7 +155,7 @@ export class DebuggerVariables extends DebugLocationTracker | |
for (let pos = start; pos < end; pos += chunkSize) { | ||
const chunkEnd = Math.min(pos + chunkSize, minnedEnd); | ||
const results = await this.evaluate( | ||
`${DataFrameLoading.DataFrameRowFunc}(${targetVariable.name}, ${pos}, ${chunkEnd})`, | ||
`${DataFrameLoading.DataFrameRowImportFunc}(${targetVariable.name}, ${pos}, ${chunkEnd})`, | ||
// tslint:disable-next-line: no-any | ||
(targetVariable as any).frameId | ||
); | ||
|
@@ -194,7 +195,8 @@ export class DebuggerVariables extends DebugLocationTracker | |
this.refreshEventEmitter.fire(); | ||
const key = this.debugService.activeDebugSession?.id; | ||
if (key) { | ||
this.importedIntoKernel.delete(key); | ||
this.importedDataFrameScriptsIntoKernel.delete(key); | ||
this.importedGetVariableInfoScriptsIntoKernel.delete(key); | ||
} | ||
} | ||
} | ||
|
@@ -217,7 +219,8 @@ export class DebuggerVariables extends DebugLocationTracker | |
} | ||
|
||
private resetImport(key: string) { | ||
this.importedIntoKernel.delete(key); | ||
this.importedDataFrameScriptsIntoKernel.delete(key); | ||
this.importedGetVariableInfoScriptsIntoKernel.delete(key); | ||
} | ||
|
||
// tslint:disable-next-line: no-any | ||
|
@@ -243,12 +246,24 @@ export class DebuggerVariables extends DebugLocationTracker | |
try { | ||
// Run our dataframe scripts only once per session because they're slow | ||
const key = this.debugService.activeDebugSession?.id; | ||
if (key && !this.importedIntoKernel.has(key)) { | ||
if (key && !this.importedDataFrameScriptsIntoKernel.has(key)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just wanted to make sure you tested this with interactive debugging and run by line? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rchiodo I tested it with interactive cell debugging, but not RBL yet. I'll check that out quick. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And dataframes open while debugging too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rchiodo Bleh, I'm glad you asked this. I think it was working in the above example as the variables had already been seen before in a non-debug context. I don't think this is all working right for the debugger right now. I think that evaluate just takes an expression, so it's only getting one function if the script file contents are passed in. I think that I can still use this approach for kernelVariables (as those work remote) then convert debugger back to using an import statement. Debugger is only local, so that should be acceptable. The import is a single expression which is probably why it works with evaluate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rchiodo Not sure if you want to take another look, but I reverted debugger back to using imports. Pretty close to the old code only we just have one file that we import now instead of two for the data frame functions. Checked the following scenarios: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you see Joyce's Team's comment about the view in variable menu? This might not work in remote situations for that too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rchiodo Did you mean on GitHub, not Teams? I commented on a thread on GitHub. If I was reading that right it's more an issue that the sys path might not work for the situation of opening a data frame viewer from a remote debugging session. Since the debuggerVariables path hasn't really changed in this PR I was figuring that I'd still push this PR to fix the non-debugging cases, then pick that up next if it's considered a must-fix. Was that right? Or were you concerned about this PR regressing something there? Might have to do something more interesting if that's the case. I think that I could push each individual function over in an evaluate, though that sounds kinda ugly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah sorry it was this issue here: I agree this isn't necessary now. (especially if this bug is must fix) |
||
await this.evaluate(DataFrameLoading.DataFrameSysImport); | ||
await this.evaluate(DataFrameLoading.DataFrameInfoImport); | ||
await this.evaluate(DataFrameLoading.DataFrameRowImport); | ||
await this.evaluate(DataFrameLoading.VariableInfoImport); | ||
this.importedIntoKernel.add(key); | ||
await this.evaluate(DataFrameLoading.DataFrameImport); | ||
this.importedDataFrameScriptsIntoKernel.add(key); | ||
} | ||
} catch (exc) { | ||
traceError('Error attempting to import in debugger', exc); | ||
} | ||
} | ||
|
||
private async importGetVariableInfoScripts(): Promise<void> { | ||
try { | ||
// Run our variable info scripts only once per session because they're slow | ||
const key = this.debugService.activeDebugSession?.id; | ||
if (key && !this.importedGetVariableInfoScriptsIntoKernel.has(key)) { | ||
await this.evaluate(GetVariableInfo.GetVariableInfoSysImport); | ||
await this.evaluate(GetVariableInfo.VariableInfoImport); | ||
this.importedGetVariableInfoScriptsIntoKernel.add(key); | ||
} | ||
} catch (exc) { | ||
traceError('Error attempting to import in debugger', exc); | ||
|
@@ -257,11 +272,11 @@ export class DebuggerVariables extends DebugLocationTracker | |
|
||
private async getFullVariable(variable: IJupyterVariable): Promise<IJupyterVariable> { | ||
// See if we imported or not into the kernel our special function | ||
await this.importDataFrameScripts(); | ||
await this.importGetVariableInfoScripts(); | ||
|
||
// Then eval calling the variable info function with our target variable | ||
const results = await this.evaluate( | ||
`${DataFrameLoading.VariableInfoFunc}(${variable.name})`, | ||
`${GetVariableInfo.VariableInfoImportFunc}(${variable.name})`, | ||
// tslint:disable-next-line: no-any | ||
(variable as any).frameId | ||
); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The two .py files have not really changed in contents. The helper and two data frame related files have just been squished into one single file here that doesn't need to import anything other than python libraries.