Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get variable info with background thread #15495

Merged
merged 7 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Query Jupyter server for the info about a dataframe
from collections import namedtuple
from importlib.util import find_spec
import json

maxStringLength = 1000
collectionTypes = ["list", "tuple", "set"]
arrayPageSize = 50


def truncateString(variable):
string = repr(variable)
if len(string) > maxStringLength:
sizeInfo = "\n\nLength: " + str(len(variable)) if type(variable) == str else ""
return string[: maxStringLength - 1] + "..." + sizeInfo
else:
return string


DisplayOptions = namedtuple("DisplayOptions", ["width", "max_columns"])


def set_pandas_display_options(display_options=None):
if find_spec("pandas") is not None:
try:
import pandas as _VSCODE_PD

original_display = DisplayOptions(
width=_VSCODE_PD.options.display.width,
max_columns=_VSCODE_PD.options.display.max_columns,
)

if display_options:
_VSCODE_PD.options.display.max_columns = display_options.max_columns
_VSCODE_PD.options.display.width = display_options.width
else:
_VSCODE_PD.options.display.max_columns = 100
_VSCODE_PD.options.display.width = 1000

return original_display
except ImportError:
pass
finally:
del _VSCODE_PD


def getValue(variable):
original_display = None
if type(variable).__name__ == "DataFrame" and find_spec("pandas") is not None:
original_display = set_pandas_display_options()

try:
return truncateString(variable=variable)
finally:
if original_display:
set_pandas_display_options(original_display)


def getPropertyNames(variable):
props = []
for prop in dir(variable):
if not prop.startswith("_"):
props.append(prop)
return props


def getFullType(varType):
module = ""
if hasattr(varType, "__module__") and varType.__module__ != "builtins":
module = varType.__module__ + "."
if hasattr(varType, "__qualname__"):
return module + varType.__qualname__
elif hasattr(varType, "__name__"):
return module + varType.__name__


def getVariableDescription(variable):
result = {}

varType = type(variable)
result["type"] = getFullType(varType)
if hasattr(varType, "__mro__"):
result["interfaces"] = [getFullType(t) for t in varType.__mro__]

if hasattr(variable, "__len__") and result["type"] in collectionTypes:
result["count"] = len(variable)

result["hasNamedChildren"] = hasattr(variable, "__dict__") or type(variable) == dict

result["value"] = getValue(variable)
return result


def getChildProperty(root, propertyChain):
try:
variable = root
for property in propertyChain:
if type(property) == int:
if hasattr(variable, "__getitem__"):
variable = variable[property]
elif type(variable) == set:
variable = list(variable)[property]
else:
return None
elif hasattr(variable, property):
variable = getattr(variable, property)
elif type(variable) == dict and property in variable:
variable = variable[property]
else:
return None
except Exception:
return None

return variable


### Get info on variables at the root level
def _VSCODE_getVariableDescriptions(varNames):
variables = [
{
"name": varName,
**getVariableDescription(globals()[varName]),
"root": varName,
"propertyChain": [],
"language": "python",
}
for varName in varNames
if varName in globals()
]

return json.dumps(variables)


### Get info on children of a variable reached through the given property chain
def _VSCODE_getAllChildrenDescriptions(rootVarName, propertyChain, startIndex):
root = globals()[rootVarName]
if root is None:
return []

parent = root
if len(propertyChain) > 0:
parent = getChildProperty(root, propertyChain)

children = []
parentInfo = getVariableDescription(parent)
if "count" in parentInfo:
if parentInfo["count"] > 0:
lastItem = min(parentInfo["count"], startIndex + arrayPageSize)
indexRange = range(startIndex, lastItem)
children = [
{
**getVariableDescription(getChildProperty(parent, [i])),
"name": str(i),
"root": rootVarName,
"propertyChain": propertyChain + [i],
"language": "python",
}
for i in indexRange
]
elif parentInfo["hasNamedChildren"]:
childrenNames = []
if hasattr(parent, "__dict__"):
childrenNames = getPropertyNames(parent)
elif type(parent) == dict:
childrenNames = list(parent.keys())

children = []
for prop in childrenNames:
child_property = getChildProperty(parent, [prop])
if child_property is not None and type(child_property).__name__ != "method":
child = {
**getVariableDescription(child_property),
"name": prop,
"root": rootVarName,
"propertyChain": propertyChain + [prop],
}
children.append(child)

return json.dumps(children)


def _VSCODE_getVariableSummary(variable):
if variable is None:
return None
# check if the variable is a dataframe
if type(variable).__name__ == "DataFrame" and find_spec("pandas") is not None:
import io

buffer = io.StringIO()
variable.info(buf=buffer)
return json.dumps({"summary": buffer.getvalue()})

return None
12 changes: 11 additions & 1 deletion src/kernels/jupyter/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import {
IJupyterConnection,
GetServerOptions,
LiveRemoteKernelConnectionMetadata,
RemoteKernelConnectionMetadata
RemoteKernelConnectionMetadata,
IKernel
} from '../types';
import { ClassType } from '../../platform/ioc/types';
import { ContributedKernelFinderKind, IContributedKernelFinder } from '../internalTypes';
Expand Down Expand Up @@ -257,3 +258,12 @@ export interface IJupyterServerProviderRegistry {
serverProvider: JupyterServerProvider
): JupyterServerCollection;
}

export const IBackgroundThreadService = Symbol('IBackgroundThreadService');
export interface IBackgroundThreadService {
execCodeInBackgroundThread<T>(
kernel: IKernel,
codeWithReturnStatement: string[],
token: CancellationToken
): Promise<T | undefined>;
}
54 changes: 20 additions & 34 deletions src/kernels/variables/pythonVariableRequester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import type * as nbformat from '@jupyterlab/nbformat';
import { inject, injectable } from 'inversify';
import { CancellationToken } from 'vscode';
import { traceError } from '../../platform/logging';
import { traceError, traceWarning } from '../../platform/logging';
import { DataScience } from '../../platform/common/utils/localize';
import { stripAnsi } from '../../platform/common/utils/regexp';
import { JupyterDataRateLimitError } from '../../platform/errors/jupyterDataRateLimitError';
Expand All @@ -14,6 +14,7 @@ import { IKernel } from '../types';
import { IKernelVariableRequester, IJupyterVariable, IVariableDescription } from './types';
import { IDataFrameScriptGenerator, IVariableScriptGenerator } from '../../platform/common/types';
import { SessionDisposedError } from '../../platform/errors/sessionDisposedError';
import { IBackgroundThreadService } from '../jupyter/types';

type DataFrameSplitFormat = {
index: (number | string)[];
Expand Down Expand Up @@ -75,7 +76,8 @@ async function safeExecuteSilently(
export class PythonVariablesRequester implements IKernelVariableRequester {
constructor(
@inject(IVariableScriptGenerator) private readonly varScriptGenerator: IVariableScriptGenerator,
@inject(IDataFrameScriptGenerator) private readonly dfScriptGenerator: IDataFrameScriptGenerator
@inject(IDataFrameScriptGenerator) private readonly dfScriptGenerator: IDataFrameScriptGenerator,
@inject(IBackgroundThreadService) private readonly backgroundThreadService: IBackgroundThreadService
) {}

public async getDataFrameInfo(
Expand Down Expand Up @@ -148,27 +150,19 @@ export class PythonVariablesRequester implements IKernelVariableRequester {
return result;
}

public async getVariableValueSummary(
targetVariable: IJupyterVariable,
kernel: IKernel,
_cancelToken?: CancellationToken
) {
const { code, cleanupCode, initializeCode } =
await this.varScriptGenerator.generateCodeToGetVariableValueSummary({ variableName: targetVariable.name });
const results = await safeExecuteSilently(
public async getVariableValueSummary(targetVariable: IJupyterVariable, kernel: IKernel, token: CancellationToken) {
const code = await this.varScriptGenerator.generateCodeToGetVariableValueSummary(targetVariable.name);

const content = await this.backgroundThreadService.execCodeInBackgroundThread<{ summary: string }>(
kernel,
{ code, cleanupCode, initializeCode },
{
traceErrors: true,
traceErrorsMessage: 'Failure in execute_request for getDataFrameInfo',
telemetryName: Telemetry.PythonVariableFetchingCodeFailure
}
code.split(/\r?\n/),
token
);

try {
const text = this.extractJupyterResultText(results);
return text;
return content?.summary;
} catch (_ex) {
traceWarning(`Exception when getting variable summary for variable ${targetVariable.name}`);
return undefined;
}
}
Expand All @@ -177,35 +171,27 @@ export class PythonVariablesRequester implements IKernelVariableRequester {
kernel: IKernel,
parent: IVariableDescription | undefined,
startIndex: number,
token?: CancellationToken
token: CancellationToken
): Promise<IVariableDescription[]> {
if (!kernel.session) {
return [];
}

const { code, cleanupCode, initializeCode } =
await this.varScriptGenerator.generateCodeToGetAllVariableDescriptions({
isDebugging: false,
parent,
startIndex
});
const options = parent ? { root: parent.root, propertyChain: parent.propertyChain, startIndex } : undefined;
const code = await this.varScriptGenerator.generateCodeToGetAllVariableDescriptions(options);

const results = await safeExecuteSilently(
const content = await this.backgroundThreadService.execCodeInBackgroundThread<IVariableDescription[]>(
kernel,
{ code, cleanupCode, initializeCode },
{
traceErrors: true,
traceErrorsMessage: 'Failure in execute_request when retrieving variables',
telemetryName: Telemetry.PythonVariableFetchingCodeFailure
}
code.split(/\r?\n/),
token
);

if (kernel.disposed || kernel.disposing || token?.isCancellationRequested) {
if (kernel.disposed || kernel.disposing || token?.isCancellationRequested || !content) {
return [];
}

try {
return this.deserializeJupyterResult(results) as Promise<IVariableDescription[]>;
return content;
} catch (ex) {
traceError(ex);
return [];
Expand Down
13 changes: 7 additions & 6 deletions src/platform/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@ type ScriptCode = {
*/
cleanupCode?: string;
};
export type ParentOptions = {
root: string;
propertyChain: (string | number)[];
startIndex: number;
};
export interface IVariableScriptGenerator {
generateCodeToGetVariableInfo(options: { isDebugging: boolean; variableName: string }): Promise<ScriptCode>;
generateCodeToGetVariableProperties(options: {
Expand All @@ -304,12 +309,8 @@ export interface IVariableScriptGenerator {
stringifiedAttributeNameList: string;
}): Promise<ScriptCode>;
generateCodeToGetVariableTypes(options: { isDebugging: boolean }): Promise<ScriptCode>;
generateCodeToGetAllVariableDescriptions(options: {
isDebugging: boolean;
parent: { root: string; propertyChain: (string | number)[] } | undefined;
startIndex: number;
}): Promise<ScriptCode>;
generateCodeToGetVariableValueSummary(options: { variableName: string }): Promise<ScriptCode>;
generateCodeToGetAllVariableDescriptions(parentOptions: ParentOptions | undefined): Promise<string>;
generateCodeToGetVariableValueSummary(variableName: string): Promise<string>;
}
export const IDataFrameScriptGenerator = Symbol('IDataFrameScriptGenerator');
export interface IDataFrameScriptGenerator {
Expand Down
Loading
Loading