Skip to content

Commit 5321b25

Browse files
authored
Merge pull request #57 from amihos/fix/hsg-multi-sector-search
fix: Enable multi-sector search for cross-sector memory retrieval
2 parents 8862e3c + da69140 commit 5321b25

File tree

2 files changed

+95
-16
lines changed

2 files changed

+95
-16
lines changed

backend/src/core/cfg.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,9 @@ export const env = {
4747
openai_model: process.env.OM_OPENAI_MODEL,
4848
gemini_key:
4949
process.env.GEMINI_API_KEY || process.env.OM_GEMINI_API_KEY || "",
50-
aws_model: str(
51-
process.env.AWS_REGION,
52-
process.env.AWS_ACCESS_KEY_ID,
53-
process.env.AWS_SECRET_ACCESS_KEY
54-
),
55-
50+
AWS_REGION: process.env.AWS_REGION || "",
51+
AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID || "",
52+
AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY || "",
5653
ollama_url: str(
5754
process.env.OLLAMA_URL || process.env.OM_OLLAMA_URL,
5855
"http://localhost:11434",

backend/src/memory/hsg.ts

Lines changed: 92 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,11 @@ export const sector_configs: Record<string, sector_cfg> = {
107107
};
108108
export const sectors = Object.keys(sector_configs);
109109
export const scoring_weights = {
110-
similarity: 0.6,
111-
overlap: 0.2,
110+
similarity: 0.40,
111+
overlap: 0.20,
112112
waypoint: 0.15,
113-
recency: 0.05,
113+
recency: 0.15,
114+
tag_match: 0.10,
114115
};
115116
export const hybrid_params = {
116117
tau: 3,
@@ -131,6 +132,58 @@ export const reinforcement = {
131132
prune_threshold: 0.05,
132133
};
133134

135+
// Sector relationship matrix for cross-sector retrieval
136+
// Higher values = stronger relationship = less penalty
137+
export const sector_relationships: Record<string, Record<string, number>> = {
138+
semantic: { procedural: 0.8, episodic: 0.6, reflective: 0.7, emotional: 0.4 },
139+
procedural: { semantic: 0.8, episodic: 0.6, reflective: 0.6, emotional: 0.3 },
140+
episodic: { reflective: 0.8, semantic: 0.6, procedural: 0.6, emotional: 0.7 },
141+
reflective: { episodic: 0.8, semantic: 0.7, procedural: 0.6, emotional: 0.6 },
142+
emotional: { episodic: 0.7, reflective: 0.6, semantic: 0.4, procedural: 0.3 },
143+
};
144+
145+
// Detect temporal markers in query for full-sector search
146+
function has_temporal_markers(text: string): boolean {
147+
const temporal_patterns = [
148+
/\b(today|yesterday|tomorrow|this\s+week|last\s+week|this\s+morning)\b/i,
149+
/\b\d{4}-\d{2}-\d{2}\b/, // ISO date format like 2025-11-20
150+
/\b20\d{2}[/-]?(0[1-9]|1[0-2])[/-]?(0[1-9]|[12]\d|3[01])\b/, // Date patterns
151+
/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}/i,
152+
/\bwhat\s+(did|have)\s+(i|we)\s+(do|done)\b/i, // "what did I do" patterns
153+
];
154+
return temporal_patterns.some(p => p.test(text));
155+
}
156+
157+
// Calculate tag match score between query tokens and memory tags
158+
async function compute_tag_match_score(memory_id: string, query_tokens: Set<string>): Promise<number> {
159+
const mem = await q.get_mem.get(memory_id);
160+
if (!mem?.tags) return 0;
161+
162+
try {
163+
const tags = JSON.parse(mem.tags);
164+
if (!Array.isArray(tags)) return 0;
165+
166+
let matches = 0;
167+
for (const tag of tags) {
168+
const tag_lower = String(tag).toLowerCase();
169+
// Check exact match
170+
if (query_tokens.has(tag_lower)) {
171+
matches += 2; // Exact match bonus
172+
} else {
173+
// Check partial match
174+
for (const token of query_tokens) {
175+
if (tag_lower.includes(token) || token.includes(tag_lower)) {
176+
matches += 1;
177+
}
178+
}
179+
}
180+
}
181+
return Math.min(1.0, matches / Math.max(1, tags.length * 2));
182+
} catch {
183+
return 0;
184+
}
185+
}
186+
134187
const compress_vec_for_storage = (
135188
vec: number[],
136189
target_dim: number,
@@ -340,13 +393,15 @@ export function compute_hybrid_score(
340393
wp_wt: number,
341394
rec_sc: number,
342395
keyword_score: number = 0,
396+
tag_match: number = 0,
343397
): number {
344398
const s_p = boosted_sim(sim);
345399
const raw =
346400
scoring_weights.similarity * s_p +
347401
scoring_weights.overlap * tok_ov +
348402
scoring_weights.waypoint * wp_wt +
349403
scoring_weights.recency * rec_sc +
404+
scoring_weights.tag_match * tag_match +
350405
keyword_score;
351406
return sigmoid(raw);
352407
}
@@ -520,7 +575,9 @@ export async function expand_via_waypoints(
520575
const neighs = await q.get_neighbors.all(cur.id);
521576
for (const neigh of neighs) {
522577
if (vis.has(neigh.dst_id)) continue;
523-
const exp_wt = cur.weight * neigh.weight * 0.8;
578+
// Clamp neighbor weight to valid range - protect against corrupted data
579+
const neigh_wt = Math.min(1.0, Math.max(0, neigh.weight || 0));
580+
const exp_wt = cur.weight * neigh_wt * 0.8;
524581
if (exp_wt < 0.1) continue;
525582
const exp_item = {
526583
id: neigh.dst_id,
@@ -683,11 +740,20 @@ export async function hsg_query(
683740
const cached = cache.get(h);
684741
if (cached && Date.now() - cached.t < TTL) return cached.r;
685742
const qc = classify_content(qt);
686-
const cs = [qc.primary, ...qc.additional];
743+
const is_temporal = has_temporal_markers(qt);
687744
const qtk = canonical_token_set(qt);
688-
const ss = f?.sectors?.length
689-
? cs.filter((s) => f.sectors!.includes(s))
690-
: cs;
745+
// Store primary sectors for scoring purposes
746+
const primary_sectors = [qc.primary, ...qc.additional];
747+
// Determine which sectors to search
748+
let ss: string[];
749+
if (f?.sectors?.length) {
750+
// User explicitly requested specific sectors
751+
ss = f.sectors;
752+
} else {
753+
// IMPORTANT: Search ALL sectors to enable cross-sector retrieval
754+
// The sector relationship penalty will down-weight less relevant sectors
755+
ss = [...sectors];
756+
}
691757
if (!ss.length) ss.push("semantic");
692758
const qe: Record<string, number[]> = {};
693759
for (const s of ss) qe[s] = await embedForSector(qt, s);
@@ -771,24 +837,40 @@ export async function hsg_query(
771837
bsec = sec;
772838
}
773839
}
840+
841+
// Apply sector relationship penalty for cross-sector results
842+
const mem_sector = m.primary_sector;
843+
const query_sector = qc.primary;
844+
let sector_penalty = 1.0;
845+
if (mem_sector !== query_sector && !primary_sectors.includes(mem_sector)) {
846+
// Apply penalty based on sector relationship strength
847+
sector_penalty = sector_relationships[query_sector]?.[mem_sector] || 0.3;
848+
}
849+
const adjusted_sim = bs * sector_penalty;
850+
774851
const em = exp.find((e: { id: string }) => e.id === mid);
775-
const ww = em?.weight || 0;
852+
// Clamp waypoint weight to valid range [0, 1] - protect against corrupted data
853+
const ww = Math.min(1.0, Math.max(0, em?.weight || 0));
776854
const ds = (Date.now() - m.last_seen_at) / 86400000;
777855
const sal = calc_decay(m.primary_sector, m.salience, ds);
778856
const mtk = canonical_token_set(m.content);
779857
const tok_ov = compute_token_overlap(qtk, mtk);
780858
const rec_sc = calc_recency_score(m.last_seen_at);
781859

860+
// Calculate tag match score
861+
const tag_match = await compute_tag_match_score(mid, qtk);
862+
782863
const keyword_boost =
783864
tier === "hybrid"
784865
? (keyword_scores.get(mid) || 0) * env.keyword_boost
785866
: 0;
786867
const fs = compute_hybrid_score(
787-
bs,
868+
adjusted_sim,
788869
tok_ov,
789870
ww,
790871
rec_sc,
791872
keyword_boost,
873+
tag_match,
792874
);
793875
const msec = await q.get_vecs_by_id.all(mid);
794876
const sl = msec.map((v) => v.sector);

0 commit comments

Comments
 (0)