diff --git a/ANALYSIS_123OPEN_TIMEOUT.md b/ANALYSIS_123OPEN_TIMEOUT.md new file mode 100644 index 000000000..f26a15ad3 --- /dev/null +++ b/ANALYSIS_123OPEN_TIMEOUT.md @@ -0,0 +1,296 @@ +# Analysis: 123open Large File Upload Timeout Issue + +## Problem Summary + +When copying large files from Baidu Netdisk to 123open storage, the upload consistently times out with "upload complete timeout" error, while: +- ✅ Baidu Netdisk → Aliyun works fine +- ✅ Direct upload → 123open works fine +- ✅ Baidu Netdisk → 123open (quick upload/秒传 when file exists) works fine +- ❌ Baidu Netdisk → 123open (large file, requires actual upload) always times out + +**Important**: Issue persists even with: +- Concurrency reduced to 1 +- Timeout extended to 2 hours + +## Code Flow Analysis + +### 1. Cross-Storage Transfer Flow (Baidu → 123open) + +``` +FileTransferTask.RunWithNextTaskCallback() + ↓ +op.Get() - Get source file object from Baidu + ↓ +op.Link() - Get download link from Baidu + ↓ +stream.NewSeekableStream() - Create stream from Baidu link + ↓ +op.Put() → Open123.Put() - Upload to 123open + ↓ +[Key Point 1] stream.CacheFullAndHash() - Compute MD5 if not available + ↓ +[Key Point 2] Open123.create() - Create upload session with MD5 + ↓ +[Key Point 3] Open123.Upload() - Upload chunks in parallel + ↓ +[Key Point 4] Open123.complete() - Poll for upload completion (60 times, 1 sec interval) +``` + +### 2. Direct Upload Flow (Local/Direct → 123open) + +``` +Local file/Direct stream + ↓ +op.Put() → Open123.Put() + ↓ +[Same as above, but stream is complete and reliable] +``` + +### 3. Quick Upload Flow (秒传) + +``` +Open123.Put() + ↓ +Open123.create() - Server detects existing file by MD5 + ↓ +createResp.Data.Reuse = true - Quick upload, no actual data transfer needed + ↓ +Return immediately with FileID +``` + +## Key Differences + +| Aspect | Direct Upload | Cross-Storage (Baidu→123) | +|--------|--------------|---------------------------| +| Data Source | Local/Complete | Streaming from Baidu (may have rate limits) | +| MD5 Calculation | From complete file | From streaming data (CacheFullAndHash) | +| Stream Reliability | High | Depends on Baidu download stability | +| Data Integrity | Guaranteed | Potential mismatch during transfer | + +## Potential Root Causes + +### Hypothesis 1: Stream Data Incompleteness +**Theory**: Baidu download stream may not complete or may be rate-limited, causing incomplete data to be uploaded to 123open. + +**Evidence**: +- Direct upload works (complete data source) +- Quick upload works (no data transfer needed) +- Only fails when actually transferring data from Baidu + +**What to check in logs**: +``` +[Transfer] Stream created for - Size matches expected? +[123open] Upload - all chunks uploaded successfully - All chunks complete? +[123open] Complete poll #N: Completed=false, FileID=0 - What's the actual response? +``` + +### Hypothesis 2: MD5 Mismatch +**Theory**: MD5 calculated during streaming (CacheFullAndHash) may differ from the actual uploaded data due to stream issues. + +**Evidence**: +- Quick upload works because MD5 match is verified by server +- CacheFullAndHash reads entire stream to compute MD5, but stream might be incomplete + +**What to check in logs**: +``` +[123open] File: - Computing MD5 via CacheFullAndHash +[123open] File: - CacheFullAndHash completed, MD5: +[123open] Upload - chunk : MD5: - Do chunk MD5s match expectations? +``` + +### Hypothesis 3: Server-Side Processing Delay +**Theory**: 123open server needs more time to merge/process chunks for large files, but complete() API doesn't properly indicate "processing" state. + +**Evidence**: +- Timeout extended to 2 hours still fails +- API documentation mentions unknown error codes (e.g., 20103) + +**What to check in logs**: +``` +[123open] Complete poll #N: Code=, Message=, Completed=, FileID= +``` + +Look for: +- `Completed=false` with `FileID=0` consistently +- Unknown error codes in Message field +- Any error patterns in API responses + +### Hypothesis 4: Chunk Upload Timing Race Condition +**Theory**: 123open server starts processing before all chunks are fully received, leading to validation failure. + +**Evidence**: +- Parallel upload with retry mechanism +- Server may start merging chunks before all are confirmed + +**What to check in logs**: +``` +[123open] Upload - file: , all chunks uploaded successfully +[123open] Complete poll #1: - How quickly after upload completion? +``` + +### Hypothesis 5: Baidu Stream Rate Limiting +**Theory**: Baidu imposes download rate limits that cause chunks to be read slowly, leading to timeout or partial reads. + +**Evidence**: +- Only happens with Baidu as source +- Works with Aliyun (different rate limit policy) + +**What to check in logs**: +``` +[123open] Upload - chunk : MD5 calculation time +[123open] Upload - chunk : HTTP request time +[Transfer] Link obtained for - Link URL and expiration +``` + +## Logging Added + +### 1. In `drivers/123_open/driver.go` - Put() method +- File upload start with size +- MD5 hash availability and computation timing +- Create response details (Reuse flag, FileID, PreuploadID) +- Quick upload success detection +- Upload phase timing +- **Detailed complete() polling logs** showing: + - Poll attempt number + - API response code and message + - Completed flag status + - FileID value + - Any errors +- Success/timeout final status + +### 2. In `drivers/123_open/upload.go` - Upload() method +- Upload parameters (domain, total size, chunk size, thread count) +- Per-chunk progress: + - Chunk number and MD5 hash + - Upload timing + - HTTP status and API response + - Success/failure status +- Total upload completion time + +### 3. In `internal/fs/copy_move.go` - Transfer task +- Source and destination storage information +- File metadata (name, size) +- Link acquisition timing and details +- Stream creation timing and characteristics +- Stream hash availability +- Upload phase timing +- Final success/failure status + +## How to Use These Logs + +### Step 1: Reproduce the Issue +Attempt to copy a large file (>100MB) from Baidu Netdisk to 123open storage. + +### Step 2: Collect Logs +Enable debug logging and capture the full log output. Look for patterns: + +```bash +# Filter for relevant logs +grep "\[123open\]\|\[Transfer\]" openlist.log > analysis.log +``` + +### Step 3: Analyze the Complete() Responses +This is the most critical part. Look at the complete() polling logs: + +``` +[123open] File: example.zip - Complete poll #1: Code=?, Message=?, Completed=?, FileID=? +[123open] File: example.zip - Complete poll #2: Code=?, Message=?, Completed=?, FileID=? +... +[123open] File: example.zip - Complete poll #60: Code=?, Message=?, Completed=?, FileID=? +``` + +**Key questions**: +1. What are the actual Code and Message values? +2. Is Completed always false or does it change? +3. Is FileID always 0 or does it get set? +4. Are there any error messages in the Message field? + +### Step 4: Compare with Direct Upload +Perform a direct upload of the same file size to 123open and compare logs: + +1. MD5 computation time +2. Chunk upload timing +3. Complete() response patterns +4. Time from "upload completed" to "complete poll #1" + +### Step 5: Check Data Integrity +1. Verify that all chunks report successful upload +2. Check if chunk count matches expected value +3. Compare MD5 computed by CacheFullAndHash with actual file MD5 (if available) + +### Step 6: Examine Stream Characteristics +1. Is the download link from Baidu valid? +2. How long does stream creation take? +3. Is the stream size reported correctly? +4. Is MD5 hash available from the stream? + +## Expected Findings + +Based on the analysis, we expect to find one of these scenarios: + +### Scenario A: Server Returns Unknown Status +``` +[123open] Complete poll #N: Code=0, Message="", Completed=false, FileID=0 +``` +**Meaning**: Upload chunks received but server is stuck in processing state without providing clear feedback. + +### Scenario B: Server Returns Error Code +``` +[123open] Complete poll #N: Code=20103, Message="", Completed=false, FileID=0 +``` +**Meaning**: Server encountered an error (possibly validation failure) but error code is undocumented. + +### Scenario C: MD5 Mismatch +``` +[123open] CacheFullAndHash computed MD5: +[123open] Upload chunk MD5s: +[123open] Complete poll: Validation failed +``` +**Meaning**: MD5 computed during streaming doesn't match actual uploaded data. + +### Scenario D: Incomplete Upload +``` +[123open] Upload - Expected chunks, uploaded chunks (M < N) +[123open] Complete poll: Missing chunks +``` +**Meaning**: Not all chunks were successfully uploaded due to stream issues. + +## Next Steps After Analysis + +Once the logs reveal the actual cause: + +1. **If Scenario A**: Need to increase polling timeout or implement exponential backoff +2. **If Scenario B**: Need to handle specific error codes or retry with different strategy +3. **If Scenario C**: Need to verify MD5 before upload or implement checksum validation +4. **If Scenario D**: Need to improve stream reliability or implement chunk verification + +## Testing Recommendations + +1. **Test with different file sizes**: + - Small (< 10MB) - Does it work? + - Medium (10-100MB) - When does it start failing? + - Large (> 100MB) - Consistent failure? + +2. **Test with different sources**: + - Baidu → 123open (fails) + - Aliyun → 123open (works?) + - Local → 123open (works) + +3. **Test network conditions**: + - Fast network vs slow network + - With/without rate limiting + +4. **Monitor server-side behavior**: + - Check 123open web interface during upload + - See if file appears in "processing" state + +## Conclusion + +The comprehensive logging added will help identify: +- Exactly what the complete() API returns (most critical) +- Whether all chunks upload successfully +- How long each phase takes +- Any data integrity issues +- Stream characteristics from Baidu + +The user should run the code with these logs enabled and share the output for further analysis. diff --git a/DEBUG_GUIDE.md b/DEBUG_GUIDE.md new file mode 100644 index 000000000..6d4b324c8 --- /dev/null +++ b/DEBUG_GUIDE.md @@ -0,0 +1,217 @@ +# Debug Guide: 123open Upload Timeout Issue + +## Quick Start + +This enhanced version adds comprehensive logging to help analyze the timeout issue when copying large files from Baidu Netdisk to 123open storage. + +## What Was Added + +### 1. Detailed Logging Throughout the Upload Process + +The code now logs every critical step: + +- **Transfer initialization**: Source/destination storage info, file metadata +- **MD5 computation**: When it starts, how long it takes, final hash +- **Upload session creation**: API response details (Reuse, FileID, PreuploadID) +- **Chunk uploads**: Each chunk's progress, MD5, timing, and status +- **Complete polling**: **Most important** - Every poll attempt with full API response + +### 2. Critical Data Points Captured + +The most important logs to watch are the `complete()` polling responses: + +```log +[123open] File: - Complete poll #N: Code=, Message=, Completed=, FileID= +``` + +This will show: +- What error codes the 123open API actually returns (like the mysterious 20103) +- Whether `Completed` stays false or changes +- Whether `FileID` is set or remains 0 +- Any error messages in the response + +## How to Use + +### Step 1: Enable Logging + +Make sure your OpenList configuration has logging enabled. Set log level to at least INFO (DEBUG is even better for detailed chunk logs). + +### Step 2: Reproduce the Issue + +Copy a large file (>100MB recommended) from Baidu Netdisk to 123open storage. + +### Step 3: Collect the Logs + +The logs will show the entire flow. Filter for the most relevant entries: + +```bash +grep "\[123open\]\|\[Transfer\]" /path/to/openlist.log > debug_output.log +``` + +### Step 4: Analyze the Complete() Responses + +Look at the complete polling section. You'll see 60 attempts (or until success). Example: + +```log +[123open] File: largefile.zip - Starting complete() polling (max 60 attempts) +[123open] File: largefile.zip - Complete poll #1: Code=0, Message="", Completed=false, FileID=0 +[123open] File: largefile.zip - Complete poll #2: Code=0, Message="", Completed=false, FileID=0 +... +[123open] File: largefile.zip - Complete poll #60: Code=0, Message="", Completed=false, FileID=0 +[123open] File: largefile.zip - Upload timeout after 60 polls (60s total) +``` + +## What to Look For + +### Scenario 1: Normal Processing Delay +```log +Complete poll #1-5: Completed=false, FileID=0 +Complete poll #6: Completed=true, FileID=12345 +``` +**Meaning**: Server just needed time to process. Solution: Increase polling count or interval. + +### Scenario 2: Unknown Error Code +```log +Complete poll #1-60: Code=20103, Message="Unknown error", Completed=false +``` +**Meaning**: Server is returning an error, possibly validation failure. Need to investigate error code meaning. + +### Scenario 3: Stuck State +```log +Complete poll #1-60: Code=0, Message="", Completed=false, FileID=0 +``` +**Meaning**: Server stuck without giving feedback. Possible reasons: +- Data integrity issue (MD5 mismatch) +- Server-side processing failure +- Missing chunks + +### Scenario 4: Partial Success +```log +Complete poll #1-30: Completed=false, FileID=0 +Complete poll #31-60: Code=404, Message="Upload session not found" +``` +**Meaning**: Upload session expired or was cleaned up. Timing issue. + +## Comparison Test + +To isolate the issue, also test: + +### Direct Upload (Should Work) +Upload the same file size directly to 123open (not from Baidu): +``` +Local file → 123open +``` + +Compare the logs: +1. MD5 computation time +2. Upload duration +3. Complete() response pattern +4. Time from "upload completed" to first complete poll + +### Different Source (Should Work) +Upload from Aliyun instead of Baidu: +``` +Aliyun → 123open +``` + +Compare to identify Baidu-specific issues. + +## Log Locations + +Key log entries by component: + +### Transfer Task (`internal/fs/copy_move.go`) +```log +[Transfer] Source: /baidu (baidu_netdisk) -> Dest: /123open (123_open), File: test.zip, Size: 104857600 bytes +[Transfer] Link obtained for test.zip in 250ms, URL: https://... +[Transfer] Stream created for test.zip in 100ms, Size: 104857600 bytes, Hash available: true (MD5: abc123...) +[Transfer] Upload completed for test.zip in 5m30s +``` + +### Upload Session (`drivers/123_open/driver.go`) +```log +[123open] Starting upload - file: test.zip, size: 104857600 bytes +[123open] File: test.zip - Initial hash available: false, hash: +[123open] File: test.zip - Computing MD5 via CacheFullAndHash (this may take time for large files) +[123open] File: test.zip - CacheFullAndHash completed in 2m15s, MD5: abc123... +[123open] File: test.zip - Creating upload session with MD5: abc123... +[123open] File: test.zip - Create response: Reuse=false, FileID=0, PreuploadID=xyz789, SliceSize=5242880 +[123open] File: test.zip - Starting chunk upload +[123open] File: test.zip - Chunk upload completed in 3m10s +[123open] File: test.zip - Starting complete() polling (max 60 attempts) +``` + +### Chunk Upload (`drivers/123_open/upload.go`) +```log +[123open] Upload - file: test.zip, domain: https://upload.123pan.com, total size: 104857600, chunk size: 5242880 +[123open] Upload - file: test.zip, total chunks: 20, upload threads: 3 +[123open] Upload - file: test.zip, chunk 1/20 uploaded successfully in 2.5s +[123open] Upload - file: test.zip, chunk 2/20 uploaded successfully in 2.3s +... +[123open] Upload - file: test.zip, all 20 chunks uploaded successfully in 3m10s +``` + +### Complete Polling (THE MOST IMPORTANT) +```log +[123open] File: test.zip - Complete poll #1: Code=0, Message="", Completed=false, FileID=0 +[123open] File: test.zip - Complete poll #2: Code=0, Message="", Completed=false, FileID=0 +... +``` + +## Common Issues and Solutions + +### Issue: CacheFullAndHash Takes Too Long +**Log**: `CacheFullAndHash completed in 10m30s` +**Cause**: Slow download from Baidu +**Solution**: May need to optimize caching strategy + +### Issue: Chunks Upload Slowly +**Log**: `chunk N/M uploaded in 30s` (consistently slow) +**Cause**: Network issues or rate limiting +**Solution**: Check network, adjust thread count + +### Issue: Complete Always Returns False +**Log**: All 60 polls show `Completed=false, FileID=0` +**Cause**: Most likely the actual bug - need to check error codes and messages +**Solution**: Analyze the Code and Message fields in complete responses + +## Next Steps After Analysis + +Once you have the logs, look for: + +1. **What error code/message does complete() actually return?** + - This is the key to understanding the failure + +2. **How long does each phase take?** + - MD5 computation + - Chunk upload + - Time between last chunk and first complete poll + +3. **Are all chunks uploaded successfully?** + - Check for any chunk upload failures + - Verify chunk count matches expected + +4. **Does the stream from Baidu have any issues?** + - Link expiration time + - Stream size correctness + - MD5 availability + +## Reporting Results + +When sharing results, please include: + +1. Complete log output (filtered for `[123open]` and `[Transfer]`) +2. File size that was tested +3. How long before timeout occurred +4. Any patterns in the complete() responses +5. Comparison with direct upload (if tested) + +## Technical Details + +For full technical analysis, see: `ANALYSIS_123OPEN_TIMEOUT.md` + +This includes: +- Complete code flow diagrams +- 5 detailed root cause hypotheses +- Expected findings scenarios +- In-depth analysis methodology diff --git a/PR_SUMMARY.md b/PR_SUMMARY.md new file mode 100644 index 000000000..2d992df6f --- /dev/null +++ b/PR_SUMMARY.md @@ -0,0 +1,186 @@ +# 123open Upload Timeout Analysis - Summary + +## Overview + +This PR adds comprehensive logging and analysis tools to investigate the timeout issue when copying large files from Baidu Netdisk to 123open storage. + +## Problem Statement + +**Symptoms:** +- ❌ Baidu Netdisk → 123open (large files): Always times out with "upload complete timeout" +- ✅ Baidu Netdisk → Aliyun: Works perfectly +- ✅ Direct upload → 123open: No issues +- ✅ Baidu Netdisk → 123open (quick upload/秒传): Works fine + +**Key Observations:** +- Issue persists even with concurrency reduced to 1 +- Issue persists even with timeout extended to 2 hours +- Suggests this is not a simple timeout configuration problem + +## What This PR Provides + +### 1. Enhanced Logging (Code Changes) + +Three files modified to add detailed logging: + +#### `drivers/123_open/driver.go` +- Upload lifecycle tracking (start, MD5 computation, creation, upload, polling) +- **Most critical**: Every `complete()` poll attempt logs full API response +- Timing metrics for each phase +- Success/failure status with context + +#### `drivers/123_open/upload.go` +- Chunk-by-chunk upload progress +- Per-chunk MD5 calculation and timing +- HTTP request/response details +- Thread pool utilization +- Total upload metrics + +#### `internal/fs/copy_move.go` +- Source/destination storage information +- Stream characteristics (size, hash availability) +- Link acquisition timing +- Complete transfer lifecycle + +### 2. Analysis Documentation (New Files) + +#### `ANALYSIS_123OPEN_TIMEOUT.md` - Technical Analysis +**Content:** +- Complete code flow diagrams for all scenarios +- 5 detailed root cause hypotheses: + 1. Stream data incompleteness from Baidu + 2. MD5 hash mismatch during streaming + 3. Server-side processing delay with unclear API response + 4. Chunk upload timing race condition + 5. Baidu download rate limiting +- Expected log patterns for each scenario +- In-depth technical analysis methodology + +#### `DEBUG_GUIDE.md` - User Guide +**Content:** +- Quick start instructions +- How to enable and collect logs +- What to look for in each log section +- Common failure scenarios explained +- Log filtering commands +- Reporting template + +#### `TESTING_SCENARIOS.md` - Testing Plan +**Content:** +- 7 specific test cases with expected results +- Detailed test steps and procedures +- Data collection templates +- Comparison metrics between scenarios +- Analysis checklist +- Automated testing suggestions + +## The Most Critical Log Entry + +The key to solving this issue is in the `complete()` API response logs: + +```log +[123open] File: - Complete poll #N: Code=, Message=, Completed=, FileID= +``` + +This will reveal: +- ✅ What error codes the 123open API actually returns (including the mysterious error code 20103 mentioned in comments) +- ✅ What error messages are provided (if any) +- ✅ Whether `Completed` ever becomes `true` or stays `false` forever +- ✅ Whether `FileID` gets assigned or remains `0` +- ✅ If there are patterns or changes in responses over time + +## How to Use This + +### Step 1: Build and Deploy +```bash +go build -tags jsoniter -o openlist . +``` + +### Step 2: Reproduce Issue +Copy a large file (>100MB) from Baidu Netdisk to 123open storage. + +### Step 3: Collect Logs +```bash +# Filter for relevant entries +grep "\[123open\]\|\[Transfer\]" /path/to/openlist.log > debug_output.log +``` + +### Step 4: Analyze Complete() Responses +Look at all 60 polling attempts and identify the pattern: + +**Scenario A - Stuck Without Error:** +``` +Poll #1-60: Code=0, Message="", Completed=false, FileID=0 +``` +→ Server processing stuck, no feedback + +**Scenario B - Error Code Returned:** +``` +Poll #1-60: Code=20103, Message="...", Completed=false, FileID=0 +``` +→ Server validation/processing error + +**Scenario C - Processing Then Success:** +``` +Poll #1-5: Completed=false, FileID=0 +Poll #6: Completed=true, FileID=12345 +``` +→ Normal delay, just need more time + +### Step 5: Compare with Direct Upload +Upload the same file size directly to 123open (not from Baidu) and compare: +- MD5 computation time +- Upload duration +- `complete()` response pattern + +## Expected Outcome + +After running with these logs, you should be able to: + +1. **Identify the exact failure mode** - What does the API actually return? +2. **Determine the root cause** - Is it: + - A timeout configuration issue? + - An API error we're not handling? + - A data integrity problem? + - A Baidu-specific streaming issue? +3. **Plan the fix** - Based on the logs, determine the correct solution + +## Why Not Fix It Now? + +As requested in the problem statement: **"不要修复问题,这个问题应该被自己研究"** (Don't fix the problem, this problem should be researched by yourself) + +This PR provides the tools and analysis framework to understand the issue, but leaves the actual fix to be determined based on empirical evidence from the logs. + +## Files Changed + +``` +drivers/123_open/driver.go - Enhanced with detailed logging +drivers/123_open/upload.go - Enhanced with chunk-level logging +internal/fs/copy_move.go - Enhanced with transfer tracking +ANALYSIS_123OPEN_TIMEOUT.md - NEW: Technical analysis document +DEBUG_GUIDE.md - NEW: User-friendly debugging guide +TESTING_SCENARIOS.md - NEW: Systematic testing plan +``` + +## Build Status + +✅ Code compiles successfully with `go build -tags jsoniter` + +## Next Steps + +1. Deploy this version +2. Run the test scenarios from `TESTING_SCENARIOS.md` +3. Collect logs following `DEBUG_GUIDE.md` +4. Analyze the `complete()` API responses +5. Identify the root cause based on evidence +6. Implement the appropriate fix + +## References + +- Technical deep dive: See `ANALYSIS_123OPEN_TIMEOUT.md` +- User guide: See `DEBUG_GUIDE.md` +- Testing: See `TESTING_SCENARIOS.md` + +--- + +**Remember**: The goal of this PR is not to fix the issue, but to provide comprehensive visibility into what's actually happening during the upload process, especially what the 123open API is returning when the timeout occurs. diff --git a/TESTING_SCENARIOS.md b/TESTING_SCENARIOS.md new file mode 100644 index 000000000..41ddcc001 --- /dev/null +++ b/TESTING_SCENARIOS.md @@ -0,0 +1,248 @@ +# Testing Scenarios for 123open Upload Issue + +## Test Matrix + +| Test # | Source | Destination | File Size | Expected Result | Purpose | +|--------|--------|-------------|-----------|-----------------|---------| +| 1 | Baidu | 123open | 50 MB | ❌ Timeout? | Establish baseline - when does it fail? | +| 2 | Baidu | 123open | 150 MB | ❌ Timeout | Confirm large file issue | +| 3 | Baidu | 123open | 500 MB | ❌ Timeout | Test with very large file | +| 4 | Local | 123open | 150 MB | ✅ Success | Verify 123open can handle the size | +| 5 | Aliyun | 123open | 150 MB | ✅ Success? | Test with different source | +| 6 | Baidu | 123open | 150 MB (exists) | ✅ Quick Upload | Verify quick upload works | +| 7 | Baidu | Aliyun | 150 MB | ✅ Success | Verify Baidu download works | + +## Detailed Test Steps + +### Test 1: Small File from Baidu to 123open +**Purpose**: Determine minimum file size that triggers the issue + +**Steps**: +1. Find or create a 50 MB file in Baidu Netdisk +2. Copy to 123open via OpenList +3. Check logs for completion status + +**Expected Logs**: +```log +[123open] File: test-50mb.zip - CacheFullAndHash completed in ~1m +[123open] Upload - all N chunks uploaded successfully in ~2m +[123open] File: test-50mb.zip - Complete poll #X: Completed=true, FileID=12345 +``` + +**If it fails**: Issue affects files smaller than expected. +**If it succeeds**: Issue is size-dependent or timing-dependent. + +--- + +### Test 2: Large File from Baidu to 123open (Expected Failure) +**Purpose**: Reproduce the reported issue + +**Steps**: +1. Use a 150 MB file in Baidu Netdisk +2. Copy to 123open via OpenList +3. Wait for timeout (up to 60 seconds after upload completes) +4. Capture complete logs + +**Expected Logs**: +```log +[123open] File: test-150mb.zip - CacheFullAndHash completed in ~3m +[123open] Upload - all N chunks uploaded successfully in ~5m +[123open] File: test-150mb.zip - Starting complete() polling (max 60 attempts) +[123open] File: test-150mb.zip - Complete poll #1-60: Completed=false +[123open] File: test-150mb.zip - Upload timeout after 60 polls +``` + +**Focus on**: What do the complete() responses actually contain? + +--- + +### Test 3: Direct Upload to 123open (Expected Success) +**Purpose**: Prove that 123open can handle the file size and format + +**Steps**: +1. Upload the same 150 MB file directly to 123open (not from another cloud storage) +2. Use local file or direct HTTP upload +3. Compare logs with Test 2 + +**Expected Logs**: +```log +[123open] File: test-150mb.zip - CacheFullAndHash completed in <30s +[123open] Upload - all N chunks uploaded successfully in ~3m +[123open] File: test-150mb.zip - Complete poll #5: Completed=true, FileID=12345 +``` + +**Compare with Test 2**: +- MD5 computation time (should be much faster) +- Upload time (might be similar) +- Complete response (should succeed) + +--- + +### Test 4: Quick Upload Test (Expected Success) +**Purpose**: Verify that the 123open API works when file already exists + +**Steps**: +1. Manually upload a file to 123open first (via web interface) +2. Try to copy the same file from Baidu to 123open +3. Should trigger quick upload (秒传) + +**Expected Logs**: +```log +[123open] File: test-150mb.zip - Create response: Reuse=true, FileID=12345 +[123open] File: test-150mb.zip - Quick upload successful (reuse) +``` + +**Result**: Should complete immediately without timeout. + +--- + +### Test 5: Baidu to Aliyun (Expected Success) +**Purpose**: Verify that the issue is specific to 123open as destination + +**Steps**: +1. Copy the same 150 MB file from Baidu to Aliyun +2. Should complete successfully according to problem description + +**Expected Result**: Success without timeout. + +--- + +## Data Collection Template + +For each test, record: + +```markdown +### Test N: [Description] + +**File**: [filename and size] +**Source**: [storage name and driver] +**Destination**: [storage name and driver] + +**Timing**: +- CacheFullAndHash: [duration] +- Chunk upload: [duration] +- Total time before timeout/success: [duration] + +**Complete() Responses** (first 5 and last 5): +Poll #1: Code=[?], Message=[?], Completed=[?], FileID=[?] +Poll #2: Code=[?], Message=[?], Completed=[?], FileID=[?] +... +Poll #60: Code=[?], Message=[?], Completed=[?], FileID=[?] + +**Result**: [Success/Timeout/Error] + +**Notes**: [Any observations] +``` + +## Key Metrics to Compare + +| Metric | Baidu→123open | Local→123open | Baidu→Aliyun | +|--------|---------------|---------------|--------------| +| MD5 computation time | ? | ? | ? | +| Chunk upload time | ? | ? | ? | +| Time to first complete poll | ? | ? | N/A | +| Complete poll responses | ? | ? | N/A | +| Success rate | 0% | ? | 100% | + +## Analysis Checklist + +After running tests, answer these questions: + +- [ ] What error codes/messages appear in complete() responses? +- [ ] Is the timeout duration the issue, or does complete() never return true? +- [ ] Does file size affect the failure rate? +- [ ] How does MD5 computation time compare between direct and cross-storage? +- [ ] Are all chunks uploaded successfully before timeout? +- [ ] Is there a pattern in the timing (e.g., always fails at 60 seconds)? +- [ ] Does the stream from Baidu have any observable issues? + +## Automated Test Script (Optional) + +If you want to automate testing, you could create a script: + +```bash +#!/bin/bash +# test_upload.sh + +SIZES=(50 100 150 200) +SOURCE="Baidu" +DEST="123open" + +for SIZE in "${SIZES[@]}"; do + echo "Testing ${SIZE}MB file" + # Your OpenList API call here to trigger copy + # Wait for completion or timeout + # Parse logs + # Record results +done +``` + +## Expected Outcome + +After running these tests, you should be able to: + +1. **Confirm the exact failure scenario** + - File size threshold (if any) + - Source/destination combination + +2. **Identify the API response pattern** + - What complete() actually returns + - Whether it's a timeout issue or API error + +3. **Isolate the root cause** + - Baidu-specific issue? + - 123open API limitation? + - Data integrity problem? + - Timing/synchronization issue? + +4. **Determine the fix approach** + - Adjust polling strategy? + - Fix MD5 handling? + - Implement retry logic? + - Handle specific error codes? + +## Reporting Format + +When reporting results, please provide: + +```markdown +## Test Results Summary + +**Environment**: +- OpenList version: [version] +- Date: [date] +- Network: [conditions] + +**Test Results**: +[Use the data collection template for each test] + +**Key Findings**: +1. [Finding 1] +2. [Finding 2] +... + +**Complete() Response Patterns**: +[What you observed in the polling logs] + +**Conclusion**: +[Your analysis of what the root cause appears to be] + +**Attached Logs**: +- test1_50mb.log +- test2_150mb_failure.log +- test3_150mb_direct_success.log +``` + +## Next Steps + +Based on test results, the next steps will be: + +1. **If complete() returns error codes**: Research those specific error codes in 123open API docs +2. **If complete() never returns true**: Investigate why server processing fails +3. **If MD5 mismatch**: Fix the CacheFullAndHash implementation +4. **If timing issue**: Adjust polling strategy (longer timeout, exponential backoff) +5. **If Baidu stream issue**: Implement better stream handling/validation + +--- + +**Remember**: The goal is not to fix it yet, but to understand exactly what's happening through detailed logging and systematic testing. diff --git a/drivers/123_open/driver.go b/drivers/123_open/driver.go index ac75e51d7..1a6b16460 100644 --- a/drivers/123_open/driver.go +++ b/drivers/123_open/driver.go @@ -12,6 +12,7 @@ import ( "github.com/OpenListTeam/OpenList/v4/internal/op" "github.com/OpenListTeam/OpenList/v4/internal/stream" "github.com/OpenListTeam/OpenList/v4/pkg/utils" + log "github.com/sirupsen/logrus" ) type Open123 struct { @@ -156,53 +157,92 @@ func (d *Open123) Remove(ctx context.Context, obj model.Obj) error { } func (d *Open123) Put(ctx context.Context, dstDir model.Obj, file model.FileStreamer, up driver.UpdateProgress) (model.Obj, error) { + fileName := file.GetName() + fileSize := file.GetSize() + log.Infof("[123open] Starting upload - file: %s, size: %d bytes", fileName, fileSize) + // 1. 创建文件 // parentFileID 父目录id,上传到根目录时填写 0 parentFileId, err := strconv.ParseInt(dstDir.GetID(), 10, 64) if err != nil { return nil, fmt.Errorf("parse parentFileID error: %v", err) } + // etag 文件md5 etag := file.GetHash().GetHash(utils.MD5) - if len(etag) < utils.MD5.Width { + needsCacheAndHash := len(etag) < utils.MD5.Width + log.Infof("[123open] File: %s - Initial hash available: %v, hash: %s", fileName, !needsCacheAndHash, etag) + + if needsCacheAndHash { + log.Infof("[123open] File: %s - Computing MD5 via CacheFullAndHash (this may take time for large files)", fileName) + cacheStart := time.Now() _, etag, err = stream.CacheFullAndHash(file, &up, utils.MD5) if err != nil { + log.Errorf("[123open] File: %s - CacheFullAndHash failed: %v", fileName, err) return nil, err } + log.Infof("[123open] File: %s - CacheFullAndHash completed in %v, MD5: %s", fileName, time.Since(cacheStart), etag) } - createResp, err := d.create(parentFileId, file.GetName(), etag, file.GetSize(), 2, false) + + log.Infof("[123open] File: %s - Creating upload session with MD5: %s", fileName, etag) + createResp, err := d.create(parentFileId, fileName, etag, fileSize, 2, false) if err != nil { + log.Errorf("[123open] File: %s - Create request failed: %v", fileName, err) return nil, err } + + log.Infof("[123open] File: %s - Create response: Reuse=%v, FileID=%d, PreuploadID=%s, SliceSize=%d", + fileName, createResp.Data.Reuse, createResp.Data.FileID, createResp.Data.PreuploadID, createResp.Data.SliceSize) + // 是否秒传 if createResp.Data.Reuse { // 秒传成功才会返回正确的 FileID,否则为 0 if createResp.Data.FileID != 0 { + log.Infof("[123open] File: %s - Quick upload successful (reuse), FileID: %d", fileName, createResp.Data.FileID) return File{ - FileName: file.GetName(), - Size: file.GetSize(), + FileName: fileName, + Size: fileSize, FileId: createResp.Data.FileID, Type: 2, Etag: etag, }, nil } + log.Warnf("[123open] File: %s - Reuse flag is true but FileID is 0, proceeding with upload", fileName) } // 2. 上传分片 + log.Infof("[123open] File: %s - Starting chunk upload", fileName) + uploadStart := time.Now() err = d.Upload(ctx, file, createResp, up) if err != nil { + log.Errorf("[123open] File: %s - Chunk upload failed after %v: %v", fileName, time.Since(uploadStart), err) return nil, err } + log.Infof("[123open] File: %s - Chunk upload completed in %v", fileName, time.Since(uploadStart)) - // 3. 上传完毕 - for range 60 { + // 3. 上传完毕 - 轮询等待服务器处理 + log.Infof("[123open] File: %s - Starting complete() polling (max 60 attempts)", fileName) + pollStart := time.Now() + for i := range 60 { uploadCompleteResp, err := d.complete(createResp.Data.PreuploadID) + + // 详细记录每次轮询的结果 + if err != nil { + log.Warnf("[123open] File: %s - Complete poll #%d: API error: %v", fileName, i+1, err) + } else { + log.Infof("[123open] File: %s - Complete poll #%d: Code=%d, Message=%s, Completed=%v, FileID=%d", + fileName, i+1, uploadCompleteResp.Code, uploadCompleteResp.Message, + uploadCompleteResp.Data.Completed, uploadCompleteResp.Data.FileID) + } + // 返回错误代码未知,如:20103,文档也没有具体说 if err == nil && uploadCompleteResp.Data.Completed && uploadCompleteResp.Data.FileID != 0 { + log.Infof("[123open] File: %s - Upload completed successfully after %d polls (%v total), FileID: %d", + fileName, i+1, time.Since(pollStart), uploadCompleteResp.Data.FileID) up(100) return File{ - FileName: file.GetName(), - Size: file.GetSize(), + FileName: fileName, + Size: fileSize, FileId: uploadCompleteResp.Data.FileID, Type: 2, Etag: etag, @@ -211,7 +251,10 @@ func (d *Open123) Put(ctx context.Context, dstDir model.Obj, file model.FileStre // 若接口返回的completed为 false 时,则需间隔1秒继续轮询此接口,获取上传最终结果。 time.Sleep(time.Second) } - return nil, fmt.Errorf("upload complete timeout") + + log.Errorf("[123open] File: %s - Upload timeout after 60 polls (%v total), PreuploadID: %s", + fileName, time.Since(pollStart), createResp.Data.PreuploadID) + return nil, fmt.Errorf("upload complete timeout after 60 seconds") } func (d *Open123) GetDetails(ctx context.Context) (*model.StorageDetails, error) { diff --git a/drivers/123_open/upload.go b/drivers/123_open/upload.go index 9bfc081a7..244d5321c 100644 --- a/drivers/123_open/upload.go +++ b/drivers/123_open/upload.go @@ -20,6 +20,7 @@ import ( "github.com/OpenListTeam/OpenList/v4/pkg/utils" "github.com/avast/retry-go" "github.com/go-resty/resty/v2" + log "github.com/sirupsen/logrus" ) // 创建文件 V2 @@ -43,24 +44,33 @@ func (d *Open123) create(parentFileID int64, filename string, etag string, size // 上传分片 V2 func (d *Open123) Upload(ctx context.Context, file model.FileStreamer, createResp *UploadCreateResp, up driver.UpdateProgress) error { + fileName := file.GetName() uploadDomain := createResp.Data.Servers[0] size := file.GetSize() chunkSize := createResp.Data.SliceSize + log.Infof("[123open] Upload - file: %s, domain: %s, total size: %d, chunk size: %d", + fileName, uploadDomain, size, chunkSize) + ss, err := stream.NewStreamSectionReader(file, int(chunkSize), &up) if err != nil { + log.Errorf("[123open] Upload - file: %s, failed to create stream section reader: %v", fileName, err) return err } uploadNums := (size + chunkSize - 1) / chunkSize thread := min(int(uploadNums), d.UploadThread) + log.Infof("[123open] Upload - file: %s, total chunks: %d, upload threads: %d", fileName, uploadNums, thread) + threadG, uploadCtx := errgroup.NewOrderedGroupWithContext(ctx, thread, retry.Attempts(3), retry.Delay(time.Second), retry.DelayType(retry.BackOffDelay)) + uploadStartTime := time.Now() for partIndex := range uploadNums { if utils.IsCanceled(uploadCtx) { + log.Warnf("[123open] Upload - file: %s, context canceled at chunk %d/%d", fileName, partIndex+1, uploadNums) break } partIndex := partIndex @@ -74,17 +84,31 @@ func (d *Open123) Upload(ctx context.Context, file model.FileStreamer, createRes b := bytes.NewBuffer(make([]byte, 0, 2048)) threadG.GoWithLifecycle(errgroup.Lifecycle{ Before: func(ctx context.Context) (err error) { + chunkStart := time.Now() reader, err = ss.GetSectionReader(offset, size) + if err != nil { + log.Errorf("[123open] Upload - file: %s, chunk %d/%d failed to get section reader: %v", + fileName, partNumber, uploadNums, err) + } else { + log.Debugf("[123open] Upload - file: %s, chunk %d/%d section reader obtained in %v", + fileName, partNumber, uploadNums, time.Since(chunkStart)) + } return }, Do: func(ctx context.Context) (err error) { + chunkStart := time.Now() reader.Seek(0, io.SeekStart) if sliceMD5 == "" { // 把耗时的计算放在这里,避免阻塞其他协程 + md5Start := time.Now() sliceMD5, err = utils.HashReader(utils.MD5, reader) if err != nil { + log.Errorf("[123open] Upload - file: %s, chunk %d/%d MD5 calculation failed: %v", + fileName, partNumber, uploadNums, err) return err } + log.Debugf("[123open] Upload - file: %s, chunk %d/%d MD5: %s (computed in %v)", + fileName, partNumber, uploadNums, sliceMD5, time.Since(md5Start)) reader.Seek(0, io.SeekStart) } @@ -127,14 +151,21 @@ func (d *Open123) Upload(ctx context.Context, file model.FileStreamer, createRes req.Header.Add("Content-Type", w.FormDataContentType()) req.Header.Add("Platform", "open_platform") + uploadReqStart := time.Now() res, err := base.HttpClient.Do(req) if err != nil { + log.Errorf("[123open] Upload - file: %s, chunk %d/%d HTTP request failed after %v: %v", + fileName, partNumber, uploadNums, time.Since(uploadReqStart), err) return err } defer res.Body.Close() + if res.StatusCode != 200 { + log.Errorf("[123open] Upload - file: %s, chunk %d/%d HTTP status %d after %v", + fileName, partNumber, uploadNums, res.StatusCode, time.Since(uploadReqStart)) return fmt.Errorf("slice %d upload failed, status code: %d", partNumber, res.StatusCode) } + b.Reset() _, err = b.ReadFrom(res.Body) if err != nil { @@ -146,11 +177,16 @@ func (d *Open123) Upload(ctx context.Context, file model.FileStreamer, createRes return err } if resp.Code != 0 { + log.Errorf("[123open] Upload - file: %s, chunk %d/%d API error code %d: %s", + fileName, partNumber, uploadNums, resp.Code, resp.Message) return fmt.Errorf("slice %d upload failed: %s", partNumber, resp.Message) } progress := 100 * float64(threadG.Success()+1) / float64(uploadNums+1) up(progress) + + log.Debugf("[123open] Upload - file: %s, chunk %d/%d uploaded successfully in %v (total: %v)", + fileName, partNumber, uploadNums, time.Since(uploadReqStart), time.Since(chunkStart)) return nil }, After: func(err error) { @@ -160,9 +196,13 @@ func (d *Open123) Upload(ctx context.Context, file model.FileStreamer, createRes } if err := threadG.Wait(); err != nil { + log.Errorf("[123open] Upload - file: %s, upload failed after %v: %v", + fileName, time.Since(uploadStartTime), err) return err } + log.Infof("[123open] Upload - file: %s, all %d chunks uploaded successfully in %v", + fileName, uploadNums, time.Since(uploadStartTime)) return nil } diff --git a/internal/fs/copy_move.go b/internal/fs/copy_move.go index 3b6d91aec..e52e6891e 100644 --- a/internal/fs/copy_move.go +++ b/internal/fs/copy_move.go @@ -17,6 +17,7 @@ import ( "github.com/OpenListTeam/OpenList/v4/server/common" "github.com/OpenListTeam/tache" "github.com/pkg/errors" + log "github.com/sirupsen/logrus" ) type taskType uint8 @@ -180,6 +181,11 @@ func (t *FileTransferTask) RunWithNextTaskCallback(f func(nextTask *FileTransfer return errors.WithMessagef(err, "failed get src [%s] file", t.SrcActualPath) } + log.Infof("[Transfer] Source: %s (%s) -> Dest: %s (%s), File: %s, Size: %d bytes", + t.SrcStorageMp, t.SrcStorage.GetStorage().Driver, + t.DstStorageMp, t.DstStorage.GetStorage().Driver, + srcObj.GetName(), srcObj.GetSize()) + if srcObj.IsDir() { t.Status = "src object is dir, listing objs" objs, err := op.List(t.Ctx(), t.SrcStorage, t.SrcActualPath, model.ListArgs{}) @@ -240,11 +246,16 @@ func (t *FileTransferTask) RunWithNextTaskCallback(f func(nextTask *FileTransfer } t.Status = "getting src object link" + linkStart := time.Now() link, srcObj, err := op.Link(t.Ctx(), t.SrcStorage, t.SrcActualPath, model.LinkArgs{}) if err != nil { return errors.WithMessagef(err, "failed get [%s] link", t.SrcActualPath) } + log.Infof("[Transfer] Link obtained for %s in %v, URL: %s, Expiration: %v", + srcObj.GetName(), time.Since(linkStart), link.URL, link.Expiration) + // any link provided is seekable + streamStart := time.Now() ss, err := stream.NewSeekableStream(&stream.FileStream{ Obj: srcObj, Ctx: t.Ctx(), @@ -253,9 +264,25 @@ func (t *FileTransferTask) RunWithNextTaskCallback(f func(nextTask *FileTransfer _ = link.Close() return errors.WithMessagef(err, "failed get [%s] stream", t.SrcActualPath) } - t.SetTotalBytes(ss.GetSize()) + + // Log stream characteristics + streamSize := ss.GetSize() + streamHash := ss.GetHash() + log.Infof("[Transfer] Stream created for %s in %v, Size: %d bytes, Hash available: %v (MD5: %s)", + srcObj.GetName(), time.Since(streamStart), streamSize, + streamHash.GetHash(utils.MD5) != "", streamHash.GetHash(utils.MD5)) + + t.SetTotalBytes(streamSize) t.Status = "uploading" - return op.Put(context.WithValue(t.Ctx(), conf.SkipHookKey, struct{}{}), t.DstStorage, t.DstActualPath, ss, t.SetProgress) + + uploadStart := time.Now() + err = op.Put(context.WithValue(t.Ctx(), conf.SkipHookKey, struct{}{}), t.DstStorage, t.DstActualPath, ss, t.SetProgress) + if err != nil { + log.Errorf("[Transfer] Upload failed for %s after %v: %v", srcObj.GetName(), time.Since(uploadStart), err) + return err + } + log.Infof("[Transfer] Upload completed for %s in %v", srcObj.GetName(), time.Since(uploadStart)) + return nil } var (