Skip to content

Commit fa3dcfc

Browse files
amihosclaude
andcommitted
fix: Enable multi-sector search for cross-sector memory retrieval
The HSG algorithm had a fundamental flaw where it only searched within sectors that matched the query's classification. This caused memories stored in one sector (e.g., reflective) to be invisible to queries classified as another sector (e.g., episodic). Changes: - Search ALL sectors by default instead of only classified sectors - Add sector relationship matrix with graduated penalties (0.3-0.8) - Add tag matching to scoring formula (0.10 weight) - Adjust scoring weights: similarity 0.6→0.4, recency 0.05→0.15 - Add temporal marker detection for date-based queries - Add compute_tag_match_score function for tag-based boosting - Apply sector relationship penalty for cross-sector results Before: Query "what did I do today 2025-11-20" couldn't find reflective memories from today because it was classified as episodic and only searched episodic sector. After: Same query finds reflective memories and returns them with appropriate sector penalty applied, enabling true cross-sector retrieval. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 8862e3c commit fa3dcfc

File tree

1 file changed

+92
-10
lines changed

1 file changed

+92
-10
lines changed

backend/src/memory/hsg.ts

Lines changed: 92 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,11 @@ export const sector_configs: Record<string, sector_cfg> = {
107107
};
108108
export const sectors = Object.keys(sector_configs);
109109
export const scoring_weights = {
110-
similarity: 0.6,
111-
overlap: 0.2,
110+
similarity: 0.40,
111+
overlap: 0.20,
112112
waypoint: 0.15,
113-
recency: 0.05,
113+
recency: 0.15,
114+
tag_match: 0.10,
114115
};
115116
export const hybrid_params = {
116117
tau: 3,
@@ -131,6 +132,58 @@ export const reinforcement = {
131132
prune_threshold: 0.05,
132133
};
133134

135+
// Sector relationship matrix for cross-sector retrieval
136+
// Higher values = stronger relationship = less penalty
137+
export const sector_relationships: Record<string, Record<string, number>> = {
138+
semantic: { procedural: 0.8, episodic: 0.6, reflective: 0.7, emotional: 0.4 },
139+
procedural: { semantic: 0.8, episodic: 0.6, reflective: 0.6, emotional: 0.3 },
140+
episodic: { reflective: 0.8, semantic: 0.6, procedural: 0.6, emotional: 0.7 },
141+
reflective: { episodic: 0.8, semantic: 0.7, procedural: 0.6, emotional: 0.6 },
142+
emotional: { episodic: 0.7, reflective: 0.6, semantic: 0.4, procedural: 0.3 },
143+
};
144+
145+
// Detect temporal markers in query for full-sector search
146+
function has_temporal_markers(text: string): boolean {
147+
const temporal_patterns = [
148+
/\b(today|yesterday|tomorrow|this\s+week|last\s+week|this\s+morning)\b/i,
149+
/\b\d{4}-\d{2}-\d{2}\b/, // ISO date format like 2025-11-20
150+
/\b20\d{2}[/-]?(0[1-9]|1[0-2])[/-]?(0[1-9]|[12]\d|3[01])\b/, // Date patterns
151+
/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}/i,
152+
/\bwhat\s+(did|have)\s+(i|we)\s+(do|done)\b/i, // "what did I do" patterns
153+
];
154+
return temporal_patterns.some(p => p.test(text));
155+
}
156+
157+
// Calculate tag match score between query tokens and memory tags
158+
async function compute_tag_match_score(memory_id: string, query_tokens: Set<string>): Promise<number> {
159+
const mem = await q.get_mem.get(memory_id);
160+
if (!mem?.tags) return 0;
161+
162+
try {
163+
const tags = JSON.parse(mem.tags);
164+
if (!Array.isArray(tags)) return 0;
165+
166+
let matches = 0;
167+
for (const tag of tags) {
168+
const tag_lower = String(tag).toLowerCase();
169+
// Check exact match
170+
if (query_tokens.has(tag_lower)) {
171+
matches += 2; // Exact match bonus
172+
} else {
173+
// Check partial match
174+
for (const token of query_tokens) {
175+
if (tag_lower.includes(token) || token.includes(tag_lower)) {
176+
matches += 1;
177+
}
178+
}
179+
}
180+
}
181+
return Math.min(1.0, matches / Math.max(1, tags.length * 2));
182+
} catch {
183+
return 0;
184+
}
185+
}
186+
134187
const compress_vec_for_storage = (
135188
vec: number[],
136189
target_dim: number,
@@ -340,13 +393,15 @@ export function compute_hybrid_score(
340393
wp_wt: number,
341394
rec_sc: number,
342395
keyword_score: number = 0,
396+
tag_match: number = 0,
343397
): number {
344398
const s_p = boosted_sim(sim);
345399
const raw =
346400
scoring_weights.similarity * s_p +
347401
scoring_weights.overlap * tok_ov +
348402
scoring_weights.waypoint * wp_wt +
349403
scoring_weights.recency * rec_sc +
404+
scoring_weights.tag_match * tag_match +
350405
keyword_score;
351406
return sigmoid(raw);
352407
}
@@ -520,7 +575,9 @@ export async function expand_via_waypoints(
520575
const neighs = await q.get_neighbors.all(cur.id);
521576
for (const neigh of neighs) {
522577
if (vis.has(neigh.dst_id)) continue;
523-
const exp_wt = cur.weight * neigh.weight * 0.8;
578+
// Clamp neighbor weight to valid range - protect against corrupted data
579+
const neigh_wt = Math.min(1.0, Math.max(0, neigh.weight || 0));
580+
const exp_wt = cur.weight * neigh_wt * 0.8;
524581
if (exp_wt < 0.1) continue;
525582
const exp_item = {
526583
id: neigh.dst_id,
@@ -683,11 +740,20 @@ export async function hsg_query(
683740
const cached = cache.get(h);
684741
if (cached && Date.now() - cached.t < TTL) return cached.r;
685742
const qc = classify_content(qt);
686-
const cs = [qc.primary, ...qc.additional];
743+
const is_temporal = has_temporal_markers(qt);
687744
const qtk = canonical_token_set(qt);
688-
const ss = f?.sectors?.length
689-
? cs.filter((s) => f.sectors!.includes(s))
690-
: cs;
745+
// Store primary sectors for scoring purposes
746+
const primary_sectors = [qc.primary, ...qc.additional];
747+
// Determine which sectors to search
748+
let ss: string[];
749+
if (f?.sectors?.length) {
750+
// User explicitly requested specific sectors
751+
ss = f.sectors;
752+
} else {
753+
// IMPORTANT: Search ALL sectors to enable cross-sector retrieval
754+
// The sector relationship penalty will down-weight less relevant sectors
755+
ss = [...sectors];
756+
}
691757
if (!ss.length) ss.push("semantic");
692758
const qe: Record<string, number[]> = {};
693759
for (const s of ss) qe[s] = await embedForSector(qt, s);
@@ -771,24 +837,40 @@ export async function hsg_query(
771837
bsec = sec;
772838
}
773839
}
840+
841+
// Apply sector relationship penalty for cross-sector results
842+
const mem_sector = m.primary_sector;
843+
const query_sector = qc.primary;
844+
let sector_penalty = 1.0;
845+
if (mem_sector !== query_sector && !primary_sectors.includes(mem_sector)) {
846+
// Apply penalty based on sector relationship strength
847+
sector_penalty = sector_relationships[query_sector]?.[mem_sector] || 0.3;
848+
}
849+
const adjusted_sim = bs * sector_penalty;
850+
774851
const em = exp.find((e: { id: string }) => e.id === mid);
775-
const ww = em?.weight || 0;
852+
// Clamp waypoint weight to valid range [0, 1] - protect against corrupted data
853+
const ww = Math.min(1.0, Math.max(0, em?.weight || 0));
776854
const ds = (Date.now() - m.last_seen_at) / 86400000;
777855
const sal = calc_decay(m.primary_sector, m.salience, ds);
778856
const mtk = canonical_token_set(m.content);
779857
const tok_ov = compute_token_overlap(qtk, mtk);
780858
const rec_sc = calc_recency_score(m.last_seen_at);
781859

860+
// Calculate tag match score
861+
const tag_match = await compute_tag_match_score(mid, qtk);
862+
782863
const keyword_boost =
783864
tier === "hybrid"
784865
? (keyword_scores.get(mid) || 0) * env.keyword_boost
785866
: 0;
786867
const fs = compute_hybrid_score(
787-
bs,
868+
adjusted_sim,
788869
tok_ov,
789870
ww,
790871
rec_sc,
791872
keyword_boost,
873+
tag_match,
792874
);
793875
const msec = await q.get_vecs_by_id.all(mid);
794876
const sl = msec.map((v) => v.sector);

0 commit comments

Comments
 (0)