Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ usage: leetcode-export [-h] [--cookies COOKIES] [--folder FOLDER]
[--problem-statement-content PROBLEM_STATEMENT_CONTENT]
[--submission-filename SUBMISSION_FILENAME]
[--only-accepted] [--only-last-submission]
[--language LANGUAGE_UNPROCESSED] [-v] [-vv] [-V]
[--language LANGUAGE_UNPROCESSED]
[--checkpoint-file CHECKPOINT_FILE] [-v] [-vv] [-V]

Export LeetCode submissions

Expand Down Expand Up @@ -144,11 +145,49 @@ options:
html, php, golang, scala, pythonml,
rust, ruby, bash, swift
example: --language=python,cpp,java
--checkpoint-file CHECKPOINT_FILE
path to checkpoint file for incremental backups (stores Unix
timestamp of newest processed submission)
-v, --verbose enable verbose logging details
-vv, --extra-verbose enable more verbose logging details
-V, --version show program's version number and exit
```

### Incremental Backups

The `--checkpoint-file` option enables incremental backups by storing the timestamp of the newest processed submission. This allows you to run the script multiple times and only download new submissions since the last run, making it much faster for regular backups.

#### How it works

1. **First run**: If the checkpoint file doesn't exist, the script will prompt you to create it and perform a full backup
2. **Subsequent runs**: The script reads the timestamp from the checkpoint file and only processes submissions newer than that timestamp
3. **Automatic updates**: The checkpoint file is updated automatically at the end of a successful run with the timestamp of the newest submission processed

#### Example usage

```bash
# First run - full backup
leetcode-export \
--folder ./submissions \
--checkpoint-file ~/.leetcode_checkpoint \
--only-accepted \
--cookies "your_cookies_here"

# Subsequent runs - only new submissions
leetcode-export \
--folder ./submissions \
--checkpoint-file ~/.leetcode_checkpoint \
--only-accepted \
--cookies "your_cookies_here"
```

#### Important notes

- The checkpoint file stores a Unix timestamp of the newest processed submission
- Only submissions that are actually written to disk (not filtered out) update the checkpoint
- If no new submissions are found, the checkpoint file remains unchanged
- The script will stop early when it reaches submissions older than the checkpoint, making it very efficient

### Problem template arguments

#### Problem statement filename template
Expand Down
82 changes: 78 additions & 4 deletions leetcode_export/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import logging
import os
from string import Template
from typing import Set
from typing import Optional, Set

from leetcode_export._version import __version__
from leetcode_export.leetcode import LeetCode
Expand Down Expand Up @@ -95,6 +95,11 @@ def parse_args():
action="store_true",
help="enable more verbose logging details",
)
parser.add_argument(
"--checkpoint-file",
type=str,
help="path to checkpoint file for incremental backups (stores Unix timestamp of newest processed submission)",
)
parser.add_argument(
"-V",
"--version",
Expand All @@ -118,6 +123,45 @@ def parse_args():
return args


def load_checkpoint(checkpoint_file: str) -> Optional[int]:
"""
Load timestamp from checkpoint file
:param checkpoint_file: path to checkpoint file
:return: Unix timestamp or None if file doesn't exist or is invalid
"""
if not os.path.exists(checkpoint_file):
logging.info(f"Checkpoint file {checkpoint_file} does not exist")
response = input(f"Create checkpoint file at {checkpoint_file} and start from beginning? (y/N): ")
if response.lower() in ['y', 'yes']:
write_checkpoint(checkpoint_file, 0)
return 0
else:
logging.error("Checkpoint file required for incremental backup. Exiting.")
exit(1)
try:
with open(checkpoint_file, 'r') as f:
timestamp = int(f.read().strip())
logging.info(f"Loaded checkpoint timestamp: {timestamp}")
return timestamp
except (ValueError, IOError) as e:
logging.error(f"Failed to read checkpoint file {checkpoint_file}: {e}")
exit(1)


def write_checkpoint(checkpoint_file: str, timestamp: int) -> None:
"""
Write timestamp to checkpoint file
:param checkpoint_file: path to checkpoint file
:param timestamp: Unix timestamp to write
"""
try:
with open(checkpoint_file, 'w') as f:
f.write(str(timestamp))
logging.debug(f"Updated checkpoint to timestamp: {timestamp}")
except IOError as e:
logging.error(f"Failed to write checkpoint file {checkpoint_file}: {e}")


def configure_logging(args):
logging_file_handler = logging.FileHandler("debug.log", encoding="UTF8")
logging_file_handler.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -166,15 +210,27 @@ def main():
logging.info("Output folder not found, creating it")
os.mkdir(args.folder)
os.chdir(args.folder)
base_folder = os.getcwd()

title_slug_to_problem_folder_name: dict[str, str] = dict()
title_slug_to_exported_languages: dict[str, set[str]] = dict()

last_submission_timestamp: Optional[int] = None

# Handle checkpoint functionality
checkpoint_timestamp: Optional[int] = None
newest_processed_timestamp: Optional[int] = None
submissions_processed = 0

if args.checkpoint_file:
checkpoint_timestamp = load_checkpoint(args.checkpoint_file)
logging.info(f"Using checkpoint file: {args.checkpoint_file}")
if checkpoint_timestamp > 0:
logging.info(f"Only processing submissions newer than timestamp {checkpoint_timestamp}")

print("Exporting LeetCode submissions...")

for submission in leetcode.get_submissions():
for submission in leetcode.get_submissions(since_timestamp=checkpoint_timestamp):
if (
last_submission_timestamp is not None
and submission.timestamp > last_submission_timestamp
Expand Down Expand Up @@ -220,7 +276,7 @@ def main():
problem_folder_name
)
if not os.path.exists(problem_folder_name):
os.mkdir(problem_folder_name)
os.makedirs(problem_folder_name, exist_ok=True)
os.chdir(problem_folder_name)

problem_statement_filename = problem_statement_filename_template.substitute(
Expand All @@ -241,17 +297,35 @@ def main():
submission_filename = submission_filename_template.substitute(
**submission.__dict__
)
submission_was_written = False
if not os.path.exists(submission_filename):
logging.info(f"Writing {submission.title_slug}/{submission_filename}")
sub_file = open(submission_filename, "w+")
sub_file.write(submission.code)
sub_file.close()
submission_was_written = True
else:
logging.info(
f"{submission.title_slug}/{submission_filename} already exists, skipping it"
)

os.chdir("..")
# Track processing for checkpoint updates
if submission_was_written:
submissions_processed += 1
if newest_processed_timestamp is None or submission.timestamp > newest_processed_timestamp:
newest_processed_timestamp = submission.timestamp

os.chdir(base_folder)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side note: this line, combined with the os.makedirs(problem_folder_name, exist_ok=True) above, allows users to specify problem folders that are nested.
As an example, I wanted to partition my problem folders by the difficulty (e.g. Easy, Medium, Hard top level folders)

As in, this command now works
--problem-folder-name '${difficulty}/${question_id}-${title_slug}'.

Example:
leetcode/solutions/Medium/98-validate-binary-search-tree


# Final summary and checkpoint update
if args.checkpoint_file:
if submissions_processed > 0:
# Only update checkpoint after successful completion of all processing
write_checkpoint(args.checkpoint_file, newest_processed_timestamp)
print(f"Processed {submissions_processed} new submissions")
print(f"Updated checkpoint to timestamp: {newest_processed_timestamp}")
else:
logging.info("No new submissions found since last checkpoint")


if __name__ == "__main__":
Expand Down
16 changes: 14 additions & 2 deletions leetcode_export/leetcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import datetime
import logging
from time import sleep
from typing import Dict, Iterator
from typing import Dict, Iterator, Optional

import requests

Expand Down Expand Up @@ -131,9 +131,10 @@ def get_problem_statement(self, slug: str) -> Problem:
)
return Problem.from_dict(problem_dict)

def get_submissions(self) -> Iterator[Submission]:
def get_submissions(self, since_timestamp: Optional[int] = None) -> Iterator[Submission]:
"""
Get submissions for logged user
:param since_timestamp: Only return submissions newer than this Unix timestamp
:return: Iterator[Submission], LeetCode submission
"""
if not self.is_user_logged():
Expand All @@ -152,7 +153,14 @@ def get_submissions(self) -> Iterator[Submission]:
logging.debug(response.content)
response_json = response.json()
if "submissions_dump" in response_json:
found_older_submission = False
for submission_dict in response_json["submissions_dump"]:
# Check if this submission is older than our checkpoint
if since_timestamp is not None and submission_dict["timestamp"] <= since_timestamp:
logging.info(f"Reached submissions older than checkpoint timestamp {since_timestamp}, stopping")
found_older_submission = True
break

submission_dict["runtime"] = submission_dict["runtime"].replace(
" ", ""
)
Expand All @@ -177,6 +185,10 @@ def get_submissions(self) -> Iterator[Submission]:
submission = Submission.from_dict(submission_dict)
yield submission

# If we found an older submission, stop pagination
if found_older_submission:
break

current += 20
sleep(5) # cooldown time for get request
if "detail" in response_json:
Expand Down