Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

serialize ContextItems in SerializedChatMessages #5910

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package com.sourcegraph.cody.agent.protocol_generated;

object ProtocolTypeAdapters {
fun register(gson: com.google.gson.GsonBuilder) {
gson.registerTypeAdapter(ContextItem::class.java, ContextItem.deserializer)
gson.registerTypeAdapter(CustomCommandResult::class.java, CustomCommandResult.deserializer)
gson.registerTypeAdapter(ProtocolAuthStatus::class.java, ProtocolAuthStatus.deserializer)
gson.registerTypeAdapter(TextEdit::class.java, TextEdit.deserializer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package com.sourcegraph.cody.agent.protocol_generated;
import com.google.gson.annotations.SerializedName;

data class SerializedChatMessage(
val contextFiles: List<ContextItem>? = null,
val contextFiles: List<SerializedContextItem>? = null,
val error: ChatError? = null,
val editorState: Any? = null,
val speaker: SpeakerEnum, // Oneof: human, assistant, system
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
@file:Suppress("FunctionName", "ClassName", "unused", "EnumEntryName", "UnusedImport")
package com.sourcegraph.cody.agent.protocol_generated;

import com.google.gson.annotations.SerializedName;

data class SerializedContextItem(
val uri: String,
val title: String? = null,
val source: ContextItemSource? = null, // Oneof: user, editor, search, initial, priority, unified, selection, terminal, history, agentic
val range: RangeData? = null,
val repoName: String? = null,
val revision: String? = null,
val description: String? = null,
val size: Long? = null,
val isIgnored: Boolean? = null,
val isTooLarge: Boolean? = null,
val isTooLargeReason: String? = null,
val provider: String? = null,
val icon: String? = null,
val metadata: List<String>? = null,
val type: TypeEnum, // Oneof: repository, tree, symbol, openctx, file
val remoteRepositoryName: String? = null,
val ranges: List<Range>? = null,
val repoID: String,
val isWorkspaceRoot: Boolean,
val name: String,
val symbolName: String,
val kind: SymbolKind, // Oneof: class, function, method
val providerUri: String,
val mention: MentionParams? = null,
) {

enum class TypeEnum {
@SerializedName("repository") Repository,
@SerializedName("tree") Tree,
@SerializedName("symbol") Symbol,
@SerializedName("openctx") Openctx,
@SerializedName("file") File,
}
}

2 changes: 1 addition & 1 deletion agent/src/cli/command-bench/strategy-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export async function evaluateChatStrategy(
const concisenessScore = await llm.judge(concisenessPrompt({ response: llmResponse }))
const contextItems = query?.contextFiles?.map(i => ({
source: i.source,
file: i.uri.path + `:${i.range?.start.line}-${i.range?.end.line}`,
file: i.uri + `:${i.range?.start.line}-${i.range?.end.line}`,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What consumes this? Because if it looks for that : separator for line ranges, it is going to encounter a : in file:/// now.

content: i.content,
}))

Expand Down
12 changes: 9 additions & 3 deletions agent/src/cli/command-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@ import ora, { type Ora, spinners } from 'ora'
import path from 'node:path'

import type { Polly } from '@pollyjs/core'
import { type ContextItem, ModelUsage, TokenCounterUtils, isDotCom } from '@sourcegraph/cody-shared'
import {
type ContextItem,
ModelUsage,
type SerializedContextItem,
TokenCounterUtils,
isDotCom,
} from '@sourcegraph/cody-shared'
import { Command } from 'commander'

import Table from 'easy-table'
Expand Down Expand Up @@ -335,8 +341,8 @@ export async function chatAction(options: ChatOptions): Promise<number> {
return 0
}

function uriDisplayText(item: ContextItem, endpoint: string): string {
const uri = vscode.Uri.from(item.uri as any)
function uriDisplayText(item: SerializedContextItem, endpoint: string): string {
const uri = vscode.Uri.parse(item.uri)
// Workaround for strange URI authority resopnse, reported in
// https://sourcegraph.slack.com/archives/C05AGQYD528/p1721382757890889
const remoteURL = new URL(uri.path, endpoint).toString()
Expand Down
44 changes: 44 additions & 0 deletions agent/src/cli/scip-codegen/AllowLists.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
export const IGNORED_INFO_SYMBOL: ReadonlyArray<string> = []

export const IGNORED_PROPERTIES: ReadonlyArray<string> = [
'npm @sourcegraph/telemetry ', // Too many complicated types from this package
'`inline-completion-item-provider-config-singleton.ts`/tracer0:',
'`observable.d.ts`/Subscription#',
'`provider.ts`/Provider#configSource',
'`StatusBar.ts`/CodyStatusBar',
'lexicalEditor/`nodes.ts`/content0',
]

export const IGNORED_TYPE_REFS: ReadonlyArray<string> = [
'`provider.ts`/Provider#',
'npm @sourcegraph/telemetry', // Too many complicated types from this package
'/TelemetryEventParameters#',
' lib/`lib.es5.d.ts`/Omit#',
]

/*
Types listed in this array will allow for loose subtyping of their members
By default the type `Item` below would not be allowed
interface HasTitle {
kind: 'has-title'
title: string
}

interface HasDescription {
kind: 'has-description'
description: string
}

type Item = {
title?: string
description?: string
} & (HasTitle | HasDescription)

but with `ALLOW_SUBTYPING_FOR_MEMBERS` set to ['Item'] the above type would be
allowed and would be generated with both title and description as nullable
even though one of them is required depending on the kind of the item
*/

export const ALLOW_SUBTYPING_FOR_MEMBERS: ReadonlyArray<string> = [
'lexicalEditor/`nodes.ts`/SerializedContextItem#',
]
38 changes: 28 additions & 10 deletions agent/src/cli/scip-codegen/BaseCodegen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,16 @@ export abstract class BaseCodegen {
return JSON.stringify(msg.toObject(), null, 2)
}

public isStringType(type: scip.Type): boolean {
public isStringType(
type: scip.Type,
options: TypecheckerOptions = { allowSubtyping: false }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unclear to me what this means.

Consider the case immediately below:

if (type.has_constant_type) {
  return type.constant_type.constant.has_string_constant
}

In that sense, "some literal" : string has type "some literal" and is a subtype of string. So if allowSubtyping is false then we are returning an inaccurate result there.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jamesmcnamara wrote this. @jamesmcnamara, can you pls reply?

Copy link
Contributor

@jamesmcnamara jamesmcnamara Oct 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This option allows us consider the a union type of T | undefined | null (or some subset) as assignable to T. Perhaps allowNullableSupertyping would be better?

): boolean {
if (type.has_constant_type) {
return type.constant_type.constant.has_string_constant
}

if (type.has_union_type) {
return type.union_type.types.every(type => this.isStringType(type))
return type.union_type.types.every(type => this.isStringType(type, options))
}

if (type.has_intersection_type) {
Expand All @@ -125,6 +128,13 @@ export abstract class BaseCodegen {
}

if (type.has_type_ref) {
const sym = type.type_ref.symbol
if (
options.allowSubtyping &&
[typescriptKeyword('undefined'), typescriptKeyword('null')].includes(sym)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question about the TypeScript AST here: Is a type like T? desugared to a representation like T | undefined by this point?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jamesmcnamara wrote this. @jamesmcnamara, can you pls reply?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, can confirm.

) {
return true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If allow sub typing is true, this doesn't make sense to me, because string | undefined is a supertype of string... all values of string inhabit string | undefined...

Would it make sense to recast the function not as isStringType but something about a bound on a type involving string?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jamesmcnamara wrote this. @jamesmcnamara, can you pls reply?

}
return (
type.type_ref.symbol === typescriptKeyword('string') ||
this.isStringTypeInfo(this.symtab.info(type.type_ref.symbol))
Expand All @@ -134,7 +144,7 @@ export abstract class BaseCodegen {
return false
}

public isBooleanTypeInfo(info: scip.SymbolInformation): boolean {
public isBooleanTypeInfo(info: scip.SymbolInformation, options?: TypecheckerOptions): boolean {
if (info.signature.has_value_signature && info.signature.value_signature.tpe.has_constant_type) {
return info.signature.value_signature.tpe.constant_type.constant.has_boolean_constant
}
Expand All @@ -147,33 +157,37 @@ export abstract class BaseCodegen {
return false
}

public isStringTypeInfo(info: scip.SymbolInformation): boolean {
public isStringTypeInfo(info: scip.SymbolInformation, options?: TypecheckerOptions): boolean {
if (info.signature.has_value_signature) {
return this.isStringType(info.signature.value_signature.tpe)
return this.isStringType(info.signature.value_signature.tpe, options)
}

if (
info.signature.has_type_signature &&
info.signature.type_signature.type_parameters &&
info.signature.type_signature.type_parameters.symlinks.length === 0
) {
return this.isStringType(info.signature.type_signature.lower_bound)
return this.isStringType(info.signature.type_signature.lower_bound, options)
}

if (info.signature.has_class_signature && info.kind === scip.SymbolInformation.Kind.Enum) {
return info.signature.class_signature.declarations.symlinks.every(member =>
this.isStringTypeInfo(this.symtab.info(member))
this.isStringTypeInfo(this.symtab.info(member), options)
)
}

return false
}

public compatibleSignatures(a: scip.SymbolInformation, b: scip.SymbolInformation): boolean {
if (this.isStringTypeInfo(a) && this.isStringTypeInfo(b)) {
public compatibleSignatures(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am uncertain about this approach.

IF I understand it, you're saying that

Item = {
    title?: string
    description?: string
} & (HasTitle | HasDescription)

is safe because HasTitle has a string field to discriminate on, and HasDescription has a string field to discriminate on? But IIRC the downstream doesn't work that way... it discriminates on a specific field.

And in practice the escape hatch works for SerializedContextItem because there's no discrimination happening... the consumer just gets that big union type. But would things blow up if we had a method with a type like SerializedContextItemTypeA | SerializedContextItemTypeB?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jamesmcnamara wrote this. @jamesmcnamara, can you pls reply?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There shouldn't be any method descriptions in here because this is used to generate code from the agent protocol so it should only contain immediate data types.

A type alias like type AorB = SerializedContextItemTypeA | SerializedContextItemTypeB should generate a discriminated union without any trouble.

The only issue we're trying to resolve here is the combination of a union type and an intersection type which is treated at the top level as an intersection type (as described in this comment). The current behavior is to check that every shared field in the sub-discriminated unions are identical and we throw an exception if they differ (even on nullability).

(Note that there is a bug in the existing implementation in that the output intersection type has all fields from all discriminated union members included, so:

type Intersect = {
  name: string
} & ({title: string} | {description: string})

would produce:

type Intersect = {
  name: string
  title: string
  description: string
}

instead of

type Intersect = {
  name: string
  title: string | undefined
  description: string | undefined
}

and that is not changed in this implementation)

The only case we're trying to handle here is where the the types of a field on the intersection type and the sub-union types differ, but they differ only in their nullability (as with the above Item type). 99% of the time, I think this was a mistake by the programmer, or this type should not be included in the agent protocol, hence it not being handled by default.

However in the 1% of cases where it is intentional, as with SerializedContextItem which takes advantage of the type-refinedness on the TypeScript side and is reasonably included in the protocol for chat migrations and exports, we have 4 options:

  1. Define a protocol specific variant of SerializedContextItem (like we do for ProtocolAuthStatus) and convert every method coming out of the agent that uses that type to the simpler protocol variant.
  2. Simplify the type on the TypeScript side to be consistent with the current code generation (would require inserting null handling in various parts of the app which would be misleading as they were never reachable)
  3. Generate the type as an union type, inlining all properties of the intersection type onto each variant of the union.
  4. Generate the type as an intersection type with each field using it's highest upper-bound supertype from the unions (or null).

1 is the most practical and probably the best practice but introduces an annoying maintenance burden of maintaining two types that are semantically identical but duplicated. 3 feels to me the most correct way but would require a careful, non-trivial implementation. 4 is what the current implementation does only for white-listed types (and indeed we see there is exactly one instance of this in the entire protocol so it's not likely a valuable case to invest in.)

@olafurpg's opinion was to use option 1 and in general to avoid allowing complex types to leak into the protocol definition. I implemented 4 because it was quick and allowed us to avoid the conversion types. But with this much pushback and the bug noted above, perhaps 1 is really the way we should go.

a: scip.SymbolInformation,
b: scip.SymbolInformation,
options?: TypecheckerOptions
): boolean {
if (this.isStringTypeInfo(a, options) && this.isStringTypeInfo(b, options)) {
return true
}
if (this.isBooleanTypeInfo(a) && this.isBooleanTypeInfo(b)) {
if (this.isBooleanTypeInfo(a, options) && this.isBooleanTypeInfo(b, options)) {
return true
}
// TODO: more optimized comparison?
Expand Down Expand Up @@ -306,3 +320,7 @@ export abstract class BaseCodegen {
throw new TypeError(`type has no properties: ${this.debug(type)}`)
}
}

export interface TypecheckerOptions {
allowSubtyping: boolean
}
Loading
Loading