Skip to content

Commit fa3e24e

Browse files
Add multi-threading for problem fetching and refactor code
Introduced multithreading to improve the efficiency of fetching and storing problems for study plans. Adapted the database schema to handle additional study plan details and provided functions for fetching problem counts and categories. Enhanced code readability by encapsulating details and resolving incorrect problem counts for study plans.
1 parent 4c7c1bb commit fa3e24e

File tree

7 files changed

+300
-83
lines changed

7 files changed

+300
-83
lines changed

.github/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
![Linux](https://img.shields.io/badge/-Linux-FCC624?style=flat-square&logo=linux&logoColor=black)
55
![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white)
66
![PostgreSQL](https://img.shields.io/badge/-PostgreSQL-336791?style=flat-square&logo=postgresql&logoColor=white)
7+
![Supabase](https://img.shields.io/badge/-Supabase-3ECF8E?style=flat-square&logo=supabase&logoColor=white)
78

89
LeetCode Scraper is a Python-based tool designed to fetch and store details from LeetCode study plans into a PostgreSQL database. This tool leverages Docker for easy setup and environment management.
910

@@ -19,7 +20,7 @@ LeetCode Scraper is a Python-based tool designed to fetch and store details from
1920
## Features
2021

2122
- Fetches LeetCode problems and study plans
22-
- Stores data in a PostgreSQL database
23+
- Stores data in a PostgreSQL/Supabase database
2324
- Provides caching to reduce redundant requests
2425
- Handles rate limiting with retry mechanisms
2526

database/database.py

Lines changed: 134 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,54 @@
88
from leetcode.study_plan import StudyPlan
99

1010

11+
def _create_study_plan_from_result(result) -> StudyPlan:
12+
"""
13+
Create a StudyPlan object from a database result.
14+
:param result: A tuple representing a row from the database query.
15+
:return: A StudyPlan object.
16+
"""
17+
slug = result[0]
18+
name = result[1]
19+
description = result[2]
20+
expected_number_of_problems = result[3]
21+
number_of_problems = result[4] if len(result) > 4 else None
22+
number_of_categories = result[5] if len(result) > 5 else None
23+
24+
return StudyPlan(
25+
slug=slug,
26+
name=name,
27+
description=description,
28+
expected_number_of_problems=expected_number_of_problems,
29+
number_of_problems=number_of_problems,
30+
number_of_categories=number_of_categories,
31+
)
32+
33+
34+
def _create_problem_from_result(result) -> Problem:
35+
"""
36+
Create a Problem object from a database result.
37+
:param result: A tuple representing a row from the database query.
38+
:return: A Problem object.
39+
"""
40+
problem_id = int(result[0])
41+
title = result[1]
42+
content = result[2]
43+
difficulty = result[3]
44+
topics = result[4] if result[4] else []
45+
companies = result[5] if result[5] else []
46+
hints = result[6] if result[6] else []
47+
48+
return Problem(
49+
id=problem_id,
50+
title=title,
51+
content=content,
52+
difficulty=difficulty,
53+
topics=topics,
54+
companies=companies,
55+
hints=hints,
56+
)
57+
58+
1159
def execute_insert(cursor, connection, sql: str, params: Dict[str, Any]) -> Any:
1260
"""
1361
Execute an insert query and return the ID of the inserted row.
@@ -91,11 +139,12 @@ def insert_problem(self, problem: Problem) -> Any | None:
91139

92140
def insert_study_plan(self, study_plan: StudyPlan) -> Any | None:
93141
sql = """
94-
INSERT INTO leetcode.study_plans (slug, name, description)
95-
VALUES (%(slug)s, %(name)s, %(description)s)
142+
INSERT INTO leetcode.study_plans (slug, name, description, expected_number_of_problems)
143+
VALUES (%(slug)s, %(name)s, %(description)s, %(expected_number_of_problems)s)
96144
ON CONFLICT (slug) DO UPDATE
97145
SET name = EXCLUDED.name,
98-
description = EXCLUDED.description
146+
description = EXCLUDED.description,
147+
expected_number_of_problems = EXCLUDED.expected_number_of_problems
99148
RETURNING id;
100149
"""
101150
return execute_insert(self.cursor, self.connection, sql, study_plan.to_dict())
@@ -134,13 +183,19 @@ def get_problem_by_slug(self, slug: str) -> Problem | None:
134183
:return: The Problem object with the given slug, or None if not found.
135184
"""
136185
sql = """
137-
SELECT * FROM leetcode.problems WHERE slug = %(slug)s;
186+
SELECT id, title, content, difficulty, topics, companies, hints
187+
FROM leetcode.problems
188+
WHERE slug = %(slug)s;
138189
"""
139190
self.cursor.execute(sql, {"slug": slug})
140191
result = self.cursor.fetchone()
192+
141193
if result is None:
142194
return None
143-
return Problem(*result)
195+
196+
problem = _create_problem_from_result(result)
197+
198+
return problem
144199

145200
def get_study_plan_by_slug(self, slug: str) -> StudyPlan | None:
146201
"""
@@ -149,25 +204,45 @@ def get_study_plan_by_slug(self, slug: str) -> StudyPlan | None:
149204
:return: The StudyPlan object with the given slug, or None if not found.
150205
"""
151206
sql = """
152-
SELECT sp.slug, sp.name, sp.description,
207+
SELECT sp.slug, sp.name, sp.description, sp.expected_number_of_problems,
153208
COUNT(DISTINCT spp.problem_id) AS number_of_problems,
154209
COUNT(DISTINCT spp.category_name) AS number_of_categories
155210
FROM leetcode.study_plans sp
156211
LEFT JOIN leetcode.study_plan_problems spp ON sp.id = spp.study_plan_id
157212
WHERE sp.slug = %(slug)s
158-
GROUP BY sp.slug, sp.name, sp.description;
213+
GROUP BY sp.slug, sp.name, sp.description, sp.expected_number_of_problems;
159214
"""
160215
self.cursor.execute(sql, {"slug": slug})
161216
result = self.cursor.fetchone()
217+
162218
if result is None:
163219
return None
164-
return StudyPlan(
165-
slug=result[0],
166-
name=result[1],
167-
description=result[2],
168-
number_of_problems=result[3],
169-
number_of_categories=result[4],
170-
)
220+
221+
return _create_study_plan_from_result(result)
222+
223+
def get_problems_by_study_plan_slug(self, slug: str) -> list[Problem]:
224+
"""
225+
Get a list of problems for a study plan.
226+
:param slug: The slug of the study plan.
227+
:return: A list of Problem objects.
228+
"""
229+
sql = """
230+
SELECT p.id, p.title, p.content, p.difficulty, p.topics, p.companies, p.hints
231+
FROM leetcode.problems p
232+
JOIN leetcode.study_plan_problems spp ON p.id = spp.problem_id
233+
JOIN leetcode.study_plans sp ON spp.study_plan_id = sp.id
234+
WHERE sp.slug = %(slug)s;
235+
"""
236+
self.cursor.execute(sql, {"slug": slug})
237+
results = self.cursor.fetchall()
238+
239+
problems = []
240+
for result in results:
241+
problem = _create_problem_from_result(result)
242+
243+
problems.append(problem)
244+
245+
return problems
171246

172247
def does_problem_exist(self, slug: str) -> bool:
173248
"""
@@ -220,6 +295,46 @@ def does_company_exist(self, company: str) -> bool:
220295
except Exception:
221296
return False
222297

298+
def get_problem_count_by_study_plan(self, slug: str) -> int:
299+
"""
300+
Get the number of problems in a study plan.
301+
:param slug: The slug of the study plan.
302+
:return: The number of problems in the study plan.
303+
"""
304+
sql = """
305+
SELECT COUNT(DISTINCT spp.problem_id)
306+
FROM leetcode.study_plan_problems spp
307+
JOIN leetcode.study_plans sp ON spp.study_plan_id = sp.id
308+
WHERE sp.slug = %(slug)s;
309+
"""
310+
self.cursor.execute(sql, {"slug": slug})
311+
result = self.cursor.fetchone()
312+
313+
if result is None:
314+
return 0
315+
316+
return result[0]
317+
318+
def get_category_count_by_study_plan(self, slug: str) -> int:
319+
"""
320+
Get the number of categories in a study plan.
321+
:param slug: The slug of the study plan.
322+
:return: The number of categories in the study plan.
323+
"""
324+
sql = """
325+
SELECT COUNT(DISTINCT spp.category_name)
326+
FROM leetcode.study_plan_problems spp
327+
JOIN leetcode.study_plans sp ON spp.study_plan_id = sp.id
328+
WHERE sp.slug = %(slug)s;
329+
"""
330+
self.cursor.execute(sql, {"slug": slug})
331+
result = self.cursor.fetchone()
332+
333+
if result is None:
334+
return 0
335+
336+
return result[0]
337+
223338
def get_problems_by_company(self, company: str) -> list[Problem]:
224339
"""
225340
Get a list of problems by a specific company.
@@ -233,7 +348,11 @@ def get_problems_by_company(self, company: str) -> list[Problem]:
233348
"""
234349
self.cursor.execute(sql, {"company": company})
235350
results = self.cursor.fetchall()
236-
return [Problem(*result) for result in results]
351+
352+
if results is None:
353+
return []
354+
355+
return [_create_problem_from_result(result) for result in results]
237356

238357
def close(self):
239358
self.cursor.close()

leetcode/api/client.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def get_study_plan_details(self, plan_slug: str) -> Dict[str, Any]:
8787
Fetch study plan details from LeetCode's GraphQL API using the provided plan slug.
8888
8989
:param plan_slug: The slug of the study plan.
90-
:return: A dictionary containing the study plan details.
90+
:return: A dictionary containing the study plan details and the total number of problems.
9191
:raises Exception: If the API request fails or the response does not contain expected data.
9292
"""
9393
api_url = "https://leetcode.com/graphql"
@@ -162,7 +162,17 @@ def get_study_plan_details(self, plan_slug: str) -> Dict[str, Any]:
162162
):
163163
raise Exception("Study plan not found or invalid response format")
164164

165-
return response_data["data"]["studyPlanV2Detail"]
165+
study_plan_details = response_data["data"]["studyPlanV2Detail"]
166+
167+
# Calculate the total number of problems in the study plan
168+
total_problems = sum(
169+
group.get("questionNum", 0) for group in study_plan_details["planSubGroups"]
170+
)
171+
172+
# Add the total number of problems to the study plan details
173+
study_plan_details["totalProblems"] = total_problems
174+
175+
return study_plan_details
166176

167177
def get_recent_questions_for_company(
168178
self,

0 commit comments

Comments
 (0)