diff --git a/docs/fangraphs.md b/docs/fangraphs.md
index b1b20369..097caf9d 100644
--- a/docs/fangraphs.md
+++ b/docs/fangraphs.md
@@ -18,6 +18,7 @@
| team | str | Team to filter data by.
Specify "0,ts" to get aggregate team data.
| position | str | Position to filter data by.
Default = ALL
| max_results | int | The maximum number of results to return.
Default = 1000000 (In effect, all results)
+| legacy | bool | Flag to select whether to get data from legacy leaderboard page or not
Default = False
## Usage
diff --git a/pybaseball/datasources/fangraphs.py b/pybaseball/datasources/fangraphs.py
index 4867a475..46c278a3 100644
--- a/pybaseball/datasources/fangraphs.py
+++ b/pybaseball/datasources/fangraphs.py
@@ -11,7 +11,8 @@
stat_list_from_str, stat_list_to_str)
from .html_table_processor import HTMLTableProcessor, RowIdFunction
-_FG_LEADERS_URL = "/leaders-legacy.aspx"
+_FG_LEADERS_URL = "/api/leaders/major-league/data"
+_LEGACY_FG_LEADERS_URL = "/leaders-legacy.aspx"
MIN_AGE = 0
MAX_AGE = 100
@@ -43,6 +44,7 @@ class FangraphsDataTable(ABC):
DATA_ROWS_XPATH: str = "({TABLE_XPATH}/tbody//tr)"
DATA_CELLS_XPATH: str = "td[position()>1]/descendant-or-self::*/text()"
QUERY_ENDPOINT: str = _FG_LEADERS_URL
+ LEGACY_QUERY_ENDPOINT: str = _LEGACY_FG_LEADERS_URL
STATS_CATEGORY: FangraphsStatsCategory = FangraphsStatsCategory.NONE
DEFAULT_STAT_COLUMNS: List[FangraphsStatColumn] = []
KNOWN_PERCENTAGES: List[str] = []
@@ -77,7 +79,7 @@ def fetch(self, start_season: int, end_season: Optional[int] = None, league: str
stat_columns: Union[str, List[str]] = 'ALL', qual: Optional[int] = None, split_seasons: bool = True,
month: str = 'ALL', on_active_roster: bool = False, minimum_age: int = MIN_AGE,
maximum_age: int = MAX_AGE, team: str = '', _filter: str = '', players: str = '',
- position: str = 'ALL', max_results: int = 1000000,) -> pd.DataFrame:
+ position: str = 'ALL', max_results: int = 1000000, legacy: bool = False) -> pd.DataFrame:
"""
Get leaderboard data from Fangraphs.
@@ -146,22 +148,26 @@ def fetch(self, start_season: int, end_season: Optional[int] = None, league: str
'age': f"{minimum_age},{maximum_age}",
'filter': _filter,
'players': players,
- 'page': f'1_{max_results}'
+ 'page': f'1_{max_results}',
+ 'pageitems': max_results # New Fangraphs Leaderboard uses pageitems to get maximum results per page
}
- return self._validate(
- self._postprocess(
- self.html_accessor.get_tabular_data_from_options(
- self.QUERY_ENDPOINT,
- query_params=url_options,
- # TODO: Remove the type: ignore after this is fixed: https://github.com/python/mypy/issues/5485
- column_name_mapper=self.COLUMN_NAME_MAPPER, # type: ignore
- known_percentages=self.KNOWN_PERCENTAGES,
- row_id_func=self.ROW_ID_FUNC,
- row_id_name=self.ROW_ID_NAME,
- )
- )
- )
+ # Add `legacy` flag to let users decide whether use legacy api or not
+ tabular_data = self.html_accessor.get_tabular_data_from_options(
+ self.LEGACY_QUERY_ENDPOINT,
+ query_params=url_options,
+ # TODO: Remove the type: ignore after this is fixed: https://github.com/python/mypy/issues/5485
+ column_name_mapper=self.COLUMN_NAME_MAPPER, # type: ignore
+ known_percentages=self.KNOWN_PERCENTAGES,
+ row_id_func=self.ROW_ID_FUNC,
+ row_id_name=self.ROW_ID_NAME,
+ ) if legacy else self.html_accessor.get_tabular_data_from_api(
+ f"{self.ROOT_URL}{self.QUERY_ENDPOINT}",
+ query_params=url_options
+ )
+
+
+ return self._validate(self._postprocess(tabular_data))
class FangraphsBattingStatsTable(FangraphsDataTable):
STATS_CATEGORY: FangraphsStatsCategory = FangraphsStatsCategory.BATTING
diff --git a/pybaseball/datasources/html_table_processor.py b/pybaseball/datasources/html_table_processor.py
index 0884a072..4f972429 100644
--- a/pybaseball/datasources/html_table_processor.py
+++ b/pybaseball/datasources/html_table_processor.py
@@ -4,6 +4,8 @@
import lxml.etree
import pandas as pd
import requests
+import json
+import re
from ..datahelpers import postprocessing
from ..datahelpers.column_mapper import ColumnListMapperFunction
@@ -95,3 +97,21 @@ def get_tabular_data_from_options(self, base_url: str, query_params: Dict[str, U
row_id_func=row_id_func,
row_id_name=row_id_name,
)
+
+ def get_tabular_data_from_api(self, base_url: str, query_params: Dict[str, Union[str, int]]):
+ # Newest Fangraphs Leaderboard API will return html tag in `Name` and `Team` column
+ # Therefore we need to extract the name and team from response result
+ def extract_text_from_html(text):
+ try:
+ return re.search('>(.+?)<', text).group(1)
+ except AttributeError:
+ return text
+
+ data = requests.get(base_url, query_params).content
+ data = json.loads(data)
+
+ df = pd.DataFrame(data['data'])
+ df['Name'] = df['Name'].apply(extract_text_from_html)
+ df['Team'] = df['Team'].apply(extract_text_from_html)
+
+ return df
diff --git a/test.ipynb b/test.ipynb
new file mode 100644
index 00000000..f54a3140
--- /dev/null
+++ b/test.ipynb
@@ -0,0 +1,786 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n", + " | IDfg | \n", + "Season | \n", + "Name | \n", + "Team | \n", + "Age | \n", + "G | \n", + "AB | \n", + "PA | \n", + "H | \n", + "1B | \n", + "... | \n", + "maxEV | \n", + "HardHit | \n", + "HardHit% | \n", + "Events | \n", + "CStr% | \n", + "CSW% | \n", + "xBA | \n", + "xSLG | \n", + "xwOBA | \n", + "L-WAR | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", + "18401 | \n", + "2023 | \n", + "Ronald Acuna Jr. | \n", + "ATL | \n", + "25 | \n", + "159 | \n", + "643 | \n", + "735 | \n", + "217 | \n", + "137 | \n", + "... | \n", + "121.2 | \n", + "309 | \n", + "0.550 | \n", + "562 | \n", + "0.137 | \n", + "0.214 | \n", + "0.356 | \n", + "0.660 | \n", + "0.463 | \n", + "8.6 | \n", + "
3 | \n", + "13611 | \n", + "2023 | \n", + "Mookie Betts | \n", + "LAD | \n", + "30 | \n", + "152 | \n", + "584 | \n", + "693 | \n", + "179 | \n", + "99 | \n", + "... | \n", + "110.1 | \n", + "232 | \n", + "0.481 | \n", + "482 | \n", + "0.205 | \n", + "0.262 | \n", + "0.295 | \n", + "0.565 | \n", + "0.411 | \n", + "8.6 | \n", + "
5 | \n", + "5361 | \n", + "2023 | \n", + "Freddie Freeman | \n", + "LAD | \n", + "33 | \n", + "161 | \n", + "637 | \n", + "730 | \n", + "211 | \n", + "121 | \n", + "... | \n", + "110.6 | \n", + "218 | \n", + "0.418 | \n", + "521 | \n", + "0.114 | \n", + "0.208 | \n", + "0.320 | \n", + "0.564 | \n", + "0.409 | \n", + "7.6 | \n", + "
4 | \n", + "14344 | \n", + "2023 | \n", + "Matt Olson | \n", + "ATL | \n", + "29 | \n", + "162 | \n", + "608 | \n", + "720 | \n", + "172 | \n", + "88 | \n", + "... | \n", + "118.6 | \n", + "245 | \n", + "0.551 | \n", + "445 | \n", + "0.131 | \n", + "0.249 | \n", + "0.263 | \n", + "0.561 | \n", + "0.394 | \n", + "6.8 | \n", + "
0 | \n", + "19755 | \n", + "2023 | \n", + "Shohei Ohtani | \n", + "LAA | \n", + "28 | \n", + "135 | \n", + "497 | \n", + "599 | \n", + "151 | \n", + "73 | \n", + "... | \n", + "118.6 | \n", + "193 | \n", + "0.533 | \n", + "362 | \n", + "0.125 | \n", + "0.265 | \n", + "0.294 | \n", + "0.634 | \n", + "0.428 | \n", + "6.6 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
132 | \n", + "15172 | \n", + "2023 | \n", + "Tim Anderson | \n", + "CHW | \n", + "30 | \n", + "123 | \n", + "493 | \n", + "524 | \n", + "121 | \n", + "100 | \n", + "... | \n", + "109.6 | \n", + "146 | \n", + "0.391 | \n", + "373 | \n", + "0.140 | \n", + "0.263 | \n", + "0.250 | \n", + "0.330 | \n", + "0.279 | \n", + "-0.8 | \n", + "
125 | \n", + "15676 | \n", + "2023 | \n", + "Jose Abreu | \n", + "HOU | \n", + "36 | \n", + "141 | \n", + "540 | \n", + "594 | \n", + "128 | \n", + "86 | \n", + "... | \n", + "113.3 | \n", + "173 | \n", + "0.416 | \n", + "416 | \n", + "0.170 | \n", + "0.291 | \n", + "0.243 | \n", + "0.400 | \n", + "0.310 | \n", + "-0.5 | \n", + "
115 | \n", + "17988 | \n", + "2023 | \n", + "Tyler Stephenson | \n", + "CIN | \n", + "26 | \n", + "142 | \n", + "465 | \n", + "517 | \n", + "113 | \n", + "78 | \n", + "... | \n", + "111.1 | \n", + "143 | \n", + "0.432 | \n", + "331 | \n", + "0.191 | \n", + "0.300 | \n", + "0.242 | \n", + "0.395 | \n", + "0.314 | \n", + "-0.8 | \n", + "
129 | \n", + "10472 | \n", + "2023 | \n", + "Enrique Hernandez | \n", + "- - - | \n", + "31 | \n", + "140 | \n", + "465 | \n", + "508 | \n", + "110 | \n", + "76 | \n", + "... | \n", + "109.2 | \n", + "125 | \n", + "0.334 | \n", + "374 | \n", + "0.177 | \n", + "0.280 | \n", + "0.222 | \n", + "0.333 | \n", + "0.271 | \n", + "-0.7 | \n", + "
116 | \n", + "10815 | \n", + "2023 | \n", + "Jurickson Profar | \n", + "- - - | \n", + "30 | \n", + "125 | \n", + "459 | \n", + "521 | \n", + "111 | \n", + "73 | \n", + "... | \n", + "108.8 | \n", + "119 | \n", + "0.317 | \n", + "375 | \n", + "0.151 | \n", + "0.236 | \n", + "0.245 | \n", + "0.344 | \n", + "0.305 | \n", + "-0.9 | \n", + "
133 rows × 320 columns
\n", + "\n", + " | Bats | \n", + "xMLBAMID | \n", + "Name | \n", + "Team | \n", + "Season | \n", + "Age | \n", + "AgeR | \n", + "SeasonMin | \n", + "SeasonMax | \n", + "G | \n", + "... | \n", + "Q | \n", + "TG | \n", + "TPA | \n", + "PlayerNameRoute | \n", + "PlayerName | \n", + "playerid | \n", + "TeamName | \n", + "TeamNameAbb | \n", + "teamid | \n", + "Pos | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "R | \n", + "660670 | \n", + "Ronald Acuna Jr. | \n", + "ATL | \n", + "2023 | \n", + "25.0 | \n", + "25 - 25 | \n", + "2023 | \n", + "2023 | \n", + "159.0 | \n", + "... | \n", + "4.5370 | \n", + "162.0 | \n", + "735.0 | \n", + "Ronald Acuna Jr. | \n", + "Ronald Acuña Jr. | \n", + "18401 | \n", + "ATL | \n", + "ATL | \n", + "16 | \n", + "0.0127 | \n", + "
1 | \n", + "R | \n", + "605141 | \n", + "Mookie Betts | \n", + "LAD | \n", + "2023 | \n", + "30.0 | \n", + "30 - 30 | \n", + "2023 | \n", + "2023 | \n", + "152.0 | \n", + "... | \n", + "4.2778 | \n", + "162.0 | \n", + "693.0 | \n", + "Mookie Betts | \n", + "Mookie Betts | \n", + "13611 | \n", + "LAD | \n", + "LAD | \n", + "22 | \n", + "0.0000 | \n", + "
2 | \n", + "L | \n", + "518692 | \n", + "Freddie Freeman | \n", + "LAD | \n", + "2023 | \n", + "33.0 | \n", + "33 - 33 | \n", + "2023 | \n", + "2023 | \n", + "161.0 | \n", + "... | \n", + "4.5062 | \n", + "162.0 | \n", + "730.0 | \n", + "Freddie Freeman | \n", + "Freddie Freeman | \n", + "5361 | \n", + "LAD | \n", + "LAD | \n", + "22 | \n", + "0.0000 | \n", + "
3 | \n", + "L | \n", + "621566 | \n", + "Matt Olson | \n", + "ATL | \n", + "2023 | \n", + "29.0 | \n", + "29 - 29 | \n", + "2023 | \n", + "2023 | \n", + "162.0 | \n", + "... | \n", + "4.4444 | \n", + "162.0 | \n", + "720.0 | \n", + "Matt Olson | \n", + "Matt Olson | \n", + "14344 | \n", + "ATL | \n", + "ATL | \n", + "16 | \n", + "0.0000 | \n", + "
4 | \n", + "L | \n", + "660271 | \n", + "Shohei Ohtani | \n", + "LAA | \n", + "2023 | \n", + "28.0 | \n", + "28 - 28 | \n", + "2023 | \n", + "2023 | \n", + "135.0 | \n", + "... | \n", + "3.6975 | \n", + "162.0 | \n", + "599.0 | \n", + "Shohei Ohtani | \n", + "Shohei Ohtani | \n", + "19755 | \n", + "LAA | \n", + "LAA | \n", + "1 | \n", + "0.8535 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
128 | \n", + "R | \n", + "641313 | \n", + "Tim Anderson | \n", + "CHW | \n", + "2023 | \n", + "30.0 | \n", + "30 - 30 | \n", + "2023 | \n", + "2023 | \n", + "123.0 | \n", + "... | \n", + "3.2346 | \n", + "162.0 | \n", + "524.0 | \n", + "Tim Anderson | \n", + "Tim Anderson | \n", + "15172 | \n", + "CHW | \n", + "CHW | \n", + "4 | \n", + "0.0000 | \n", + "
129 | \n", + "R | \n", + "547989 | \n", + "Jose Abreu | \n", + "HOU | \n", + "2023 | \n", + "36.0 | \n", + "36 - 36 | \n", + "2023 | \n", + "2023 | \n", + "141.0 | \n", + "... | \n", + "3.6667 | \n", + "162.0 | \n", + "594.0 | \n", + "Jose Abreu | \n", + "José Abreu | \n", + "15676 | \n", + "HOU | \n", + "HOU | \n", + "21 | \n", + "0.0496 | \n", + "
130 | \n", + "R | \n", + "663886 | \n", + "Tyler Stephenson | \n", + "CIN | \n", + "2023 | \n", + "26.0 | \n", + "26 - 26 | \n", + "2023 | \n", + "2023 | \n", + "142.0 | \n", + "... | \n", + "3.1914 | \n", + "162.0 | \n", + "517.0 | \n", + "Tyler Stephenson | \n", + "Tyler Stephenson | \n", + "17988 | \n", + "CIN | \n", + "CIN | \n", + "18 | \n", + "0.3007 | \n", + "
131 | \n", + "R | \n", + "571771 | \n", + "Enrique Hernandez | \n", + "- - - | \n", + "2023 | \n", + "31.0 | \n", + "31 - 31 | \n", + "2023 | \n", + "2023 | \n", + "140.0 | \n", + "... | \n", + "3.1358 | \n", + "162.0 | \n", + "508.0 | \n", + "Enrique Hernandez | \n", + "Enrique Hernández | \n", + "10472 | \n", + "- - - | \n", + "2 Tms | \n", + "0 | \n", + "0.0000 | \n", + "
132 | \n", + "B | \n", + "595777 | \n", + "Jurickson Profar | \n", + "- - - | \n", + "2023 | \n", + "30.0 | \n", + "30 - 30 | \n", + "2023 | \n", + "2023 | \n", + "125.0 | \n", + "... | \n", + "3.2160 | \n", + "162.0 | \n", + "521.0 | \n", + "Jurickson Profar | \n", + "Jurickson Profar | \n", + "10815 | \n", + "- - - | \n", + "2 Tms | \n", + "0 | \n", + "0.1488 | \n", + "
133 rows × 357 columns
\n", + "