Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/lib/rate-limit/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ import {
normalizeResetTime,
} from "./time-utils";

const SESSION_TTL_SECONDS = (() => {
const parsed = Number.parseInt(process.env.SESSION_TTL ?? "", 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 300;
})();
const SESSION_TTL_MS = SESSION_TTL_SECONDS * 1000;

interface CostLimit {
amount: number | null;
period: "5h" | "daily" | "weekly" | "monthly";
Expand Down Expand Up @@ -566,14 +572,14 @@ export class RateLimitService {
const key = `provider:${providerId}:active_sessions`;
const now = Date.now();

// 执行 Lua 脚本:原子性检查 + 追踪(TC-041 修复版)
const result = (await RateLimitService.redis.eval(
CHECK_AND_TRACK_SESSION,
1, // KEYS count
key, // KEYS[1]
sessionId, // ARGV[1]
limit.toString(), // ARGV[2]
now.toString() // ARGV[3]
now.toString(), // ARGV[3]
SESSION_TTL_MS.toString() // ARGV[4]
)) as [number, number, number];

const [allowed, count, tracked] = result;
Expand Down
58 changes: 34 additions & 24 deletions src/lib/redis/lua-scripts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,66 @@
*/

/**
* 原子性检查并发限制 + 追踪 Session(TC-041 修复版)
* Atomic concurrency check + session tracking (TC-041 fixed version)
*
* 功能:
* 1. 清理过期 session(5 分钟前)
* 2. 检查 session 是否已追踪(避免重复计数)
* 3. 检查当前并发数是否超限
* 4. 如果未超限,追踪新 session(原子操作)
* Features:
* 1. Cleanup expired sessions (based on TTL window)
* 2. Check if session is already tracked (avoid duplicate counting)
* 3. Check if current concurrency exceeds limit
* 4. If not exceeded, track new session (atomic operation)
*
* KEYS[1]: provider:${providerId}:active_sessions
* ARGV[1]: sessionId
* ARGV[2]: limit(并发限制)
* ARGV[3]: now(当前时间戳,毫秒)
* ARGV[2]: limit (concurrency limit)
* ARGV[3]: now (current timestamp, ms)
* ARGV[4]: ttlMs (optional, cleanup window in ms, default 300000)
*
* 返回值:
* - {1, count, 1} - 允许(新追踪),返回新的并发数和 tracked=1
* - {1, count, 0} - 允许(已追踪),返回当前并发数和 tracked=0
* - {0, count, 0} - 拒绝(超限),返回当前并发数和 tracked=0
* Return:
* - {1, count, 1} - allowed (new tracking), returns new count and tracked=1
* - {1, count, 0} - allowed (already tracked), returns current count and tracked=0
* - {0, count, 0} - rejected (limit reached), returns current count and tracked=0
*/
export const CHECK_AND_TRACK_SESSION = `
local provider_key = KEYS[1]
local session_id = ARGV[1]
local limit = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local ttl = 300000 -- 5 分钟(毫秒)
local ttl = tonumber(ARGV[4]) or 300000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CRITICAL] [LOGIC-BUG] Lua ttlMs needs clamping to avoid clearing the ZSET (limit bypass)

Why this is a problem: In src/lib/redis/lua-scripts.ts:32-36 the script trusts ARGV[4]:

local ttl = tonumber(ARGV[4]) or 300000

When ttl is 0/negative, cutoff = now - ttl becomes >= now, and ZREMRANGEBYSCORE ... -inf cutoff can remove all existing sessions, making the concurrency check under-count and effectively bypass limits.

Suggested fix:

local ttl = tonumber(ARGV[4]) or 300000
if ttl <= 0 then
  ttl = 300000
end


-- 1. 清理过期 session(5 分钟前)
local five_minutes_ago = now - ttl
redis.call('ZREMRANGEBYSCORE', provider_key, '-inf', five_minutes_ago)
-- Guard against invalid TTL (prevents clearing all sessions)
if ttl <= 0 then
ttl = 300000
end

-- 1. Cleanup expired sessions (TTL window ago)
local cutoff = now - ttl
redis.call('ZREMRANGEBYSCORE', provider_key, '-inf', cutoff)

-- 2. 检查 session 是否已追踪
-- 2. Check if session is already tracked
local is_tracked = redis.call('ZSCORE', provider_key, session_id)

-- 3. 获取当前并发数
-- 3. Get current concurrency count
local current_count = redis.call('ZCARD', provider_key)

-- 4. 检查限制(排除已追踪的 session
-- 4. Check limit (exclude already tracked session)
if limit > 0 and not is_tracked and current_count >= limit then
return {0, current_count, 0} -- {allowed=false, current_count, tracked=0}
end

-- 5. 追踪 sessionZADD 对已存在的成员只更新时间戳)
-- 5. Track session (ZADD updates timestamp for existing members)
redis.call('ZADD', provider_key, now, session_id)
redis.call('EXPIRE', provider_key, 3600) -- 1 小时兜底 TTL

-- 6. 返回成功
-- 6. Set TTL based on session TTL (at least 1h to cover active sessions)
local ttl_seconds = math.floor(ttl / 1000)
local expire_ttl = math.max(3600, ttl_seconds)
redis.call('EXPIRE', provider_key, expire_ttl)

-- 7. Return success
if is_tracked then
-- 已追踪,计数不变
-- Already tracked, count unchanged
return {1, current_count, 0} -- {allowed=true, count, tracked=0}
else
-- 新追踪,计数 +1
-- New tracking, count +1
return {1, current_count + 1, 1} -- {allowed=true, new_count, tracked=1}
end
`;
Expand Down
45 changes: 25 additions & 20 deletions src/lib/session-tracker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@ import { getRedisClient } from "./redis";
* - user:${userId}:active_sessions (ZSET): 同上
*/
export class SessionTracker {
private static readonly SESSION_TTL = 300000; // 5 分钟(毫秒)
private static readonly SESSION_TTL_SECONDS = (() => {
const parsed = Number.parseInt(process.env.SESSION_TTL ?? "", 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 300;
})();
private static readonly SESSION_TTL_MS = SessionTracker.SESSION_TTL_SECONDS * 1000;
Copy link
Contributor

@github-actions github-actions bot Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CRITICAL] [TYPE-MISSING-VALIDATION] SESSION_TTL can break cleanup/counting when <= 0 or non-numeric

Why this is a problem: In src/lib/session-tracker.ts:20-21 the new TTL is parsed without validation:

private static readonly SESSION_TTL_SECONDS = parseInt(process.env.SESSION_TTL || "300", 10);

If SESSION_TTL is 0/negative, the new cutoff math (e.g. now - SessionTracker.SESSION_TTL_MS) makes ZREMRANGEBYSCORE remove all entries (cutoff >= now), effectively zeroing counts and bypassing concurrency limits. If it's non-numeric, ttlSeconds becomes NaN and gets passed into Redis EXPIRE/SETEX, which can throw and disable the refresh path.

Suggested fix:

private static readonly SESSION_TTL_SECONDS = (() => {
  const ttl = Number(process.env.SESSION_TTL);
  if (!Number.isFinite(ttl) || ttl <= 0) return 300;
  return Math.floor(ttl);
})();
private static readonly SESSION_TTL_MS = SessionTracker.SESSION_TTL_SECONDS * 1000;

private static readonly CLEANUP_PROBABILITY = 0.01;

/**
* 初始化 SessionTracker,自动清理旧格式数据
Expand Down Expand Up @@ -174,26 +179,26 @@ export class SessionTracker {
try {
const now = Date.now();
const pipeline = redis.pipeline();
const ttlSeconds = SessionTracker.SESSION_TTL_SECONDS;
const providerZSetKey = `provider:${providerId}:active_sessions`;

// 更新所有相关 ZSET 的时间戳(滑动窗口)
pipeline.zadd("global:active_sessions", now, sessionId);
pipeline.zadd(`key:${keyId}:active_sessions`, now, sessionId);
pipeline.zadd(`provider:${providerId}:active_sessions`, now, sessionId);
pipeline.zadd(providerZSetKey, now, sessionId);
// Use dynamic TTL based on session TTL (at least 1h to cover active sessions)
pipeline.expire(providerZSetKey, Math.max(3600, ttlSeconds));
if (userId !== undefined) {
pipeline.zadd(`user:${userId}:active_sessions`, now, sessionId);
}

// 修复 Bug:同步刷新 session 绑定信息的 TTL
//
// 问题:ZSET 条目(上面 zadd)会在每次请求时更新时间戳,但绑定信息 key 的 TTL 不会自动刷新
// 导致:session 创建 5 分钟后,ZSET 仍有记录(仍被计为活跃),但绑定信息已过期,造成:
// 1. 并发检查被绕过(无法从绑定信息查询 session 所属 provider/key,检查失效)
// 2. Session 复用失败(无法确定 session 绑定关系,被迫创建新 session)
//
// 解决:每次 refreshSession 时同步刷新绑定信息 TTL(与 ZSET 保持 5 分钟生命周期一致)
pipeline.expire(`session:${sessionId}:provider`, 300); // 5 分钟(秒)
pipeline.expire(`session:${sessionId}:key`, 300);
pipeline.setex(`session:${sessionId}:last_seen`, 300, now.toString());
pipeline.expire(`session:${sessionId}:provider`, ttlSeconds);
pipeline.expire(`session:${sessionId}:key`, ttlSeconds);
pipeline.setex(`session:${sessionId}:last_seen`, ttlSeconds, now.toString());

if (Math.random() < SessionTracker.CLEANUP_PROBABILITY) {
const cutoffMs = now - SessionTracker.SESSION_TTL_MS;
pipeline.zremrangebyscore(providerZSetKey, "-inf", cutoffMs);
}
Comment on lines +198 to +201
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This probabilistic cleanup of the providerZSetKey appears to be redundant. The same zset is already cleaned in other parts of the application:

  1. Deterministically within the CHECK_AND_TRACK_SESSION Lua script, which is executed on the hot path for providers with session limits.
  2. When getProviderSessionCount is called for statistics.

This redundancy adds complexity and a small performance overhead on the write path. To simplify the logic, you could remove this probabilistic cleanup and rely on the other two mechanisms.


const results = await pipeline.exec();

Expand Down Expand Up @@ -374,14 +379,14 @@ export class SessionTracker {

try {
const now = Date.now();
const fiveMinutesAgo = now - SessionTracker.SESSION_TTL;
const cutoffMs = now - SessionTracker.SESSION_TTL_MS;

// 第一阶段:批量清理过期 session 并获取 session IDs
const cleanupPipeline = redis.pipeline();
for (const providerId of providerIds) {
const key = `provider:${providerId}:active_sessions`;
// 清理过期 session
cleanupPipeline.zremrangebyscore(key, "-inf", fiveMinutesAgo);
cleanupPipeline.zremrangebyscore(key, "-inf", cutoffMs);
// 获取剩余 session IDs
cleanupPipeline.zrange(key, 0, -1);
}
Expand Down Expand Up @@ -480,10 +485,10 @@ export class SessionTracker {
}

const now = Date.now();
const fiveMinutesAgo = now - SessionTracker.SESSION_TTL;
const cutoffMs = now - SessionTracker.SESSION_TTL_MS;

// 清理过期 session
await redis.zremrangebyscore(key, "-inf", fiveMinutesAgo);
await redis.zremrangebyscore(key, "-inf", cutoffMs);

// 获取剩余的 session ID
return await redis.zrange(key, 0, -1);
Expand Down Expand Up @@ -514,10 +519,10 @@ export class SessionTracker {

try {
const now = Date.now();
const fiveMinutesAgo = now - SessionTracker.SESSION_TTL;
const cutoffMs = now - SessionTracker.SESSION_TTL_MS;

// 1. 清理过期 session(5 分钟前)
await redis.zremrangebyscore(key, "-inf", fiveMinutesAgo);
await redis.zremrangebyscore(key, "-inf", cutoffMs);

// 2. 获取剩余的 session ID
const sessionIds = await redis.zrange(key, 0, -1);
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/lib/rate-limit/service-extra.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,25 @@ describe("RateLimitService - other quota paths", () => {
expect(result).toEqual({ allowed: true, count: 1, tracked: true });
});

it("checkAndTrackProviderSession: should pass SESSION_TTL_MS as ARGV[4] to Lua script", async () => {
const { RateLimitService } = await import("@/lib/rate-limit");

redisClientRef.eval.mockResolvedValueOnce([1, 1, 1]);
await RateLimitService.checkAndTrackProviderSession(9, "sess", 2);

// Verify eval was called with the correct args including ARGV[4] = SESSION_TTL_MS
expect(redisClientRef.eval).toHaveBeenCalledTimes(1);

const evalCall = redisClientRef.eval.mock.calls[0];
// evalCall: [script, numkeys, key, sessionId, limit, now, ttlMs]
// Indices: 0 1 2 3 4 5 6
expect(evalCall.length).toBe(7); // script + 1 key + 5 ARGV

// ARGV[4] (index 6) should be SESSION_TTL_MS derived from env (default 300s = 300000ms)
const ttlMsArg = evalCall[6];
expect(ttlMsArg).toBe("300000");
});

it("trackUserDailyCost:fixed 模式应使用 STRING + TTL", async () => {
const { RateLimitService } = await import("@/lib/rate-limit");

Expand Down
Loading
Loading