Skip to content

Commit

Permalink
[pinterest] use Pinterest's "Web" API (#83)
Browse files Browse the repository at this point in the history
no access tokens, no user credentials of any kind ...
  • Loading branch information
mikf committed Apr 24, 2018
1 parent c6d5154 commit 55d4d23
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 67 deletions.
132 changes: 66 additions & 66 deletions gallery_dl/extractor/pinterest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,27 @@
"""Extract images from https://www.pinterest.com"""

from .common import Extractor, Message
from .. import text, util, exception
from .. import text, exception
import json


class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
filename_fmt = "{category}_{pin_id}.{extension}"
archive_fmt = "{pin_id}"
filename_fmt = "{category}_{id}.{extension}"
archive_fmt = "{id}"

def __init__(self):
Extractor.__init__(self)
self.api = PinterestAPI(self)

def data_from_pin(self, pin):
"""Get image url and metadata from a pin-object"""
img = pin["image"]["original"]
img = pin["images"]["orig"]
url = img["url"]
data = {
"pin_id": util.safe_int(pin["id"]),
"note": pin["note"],
"width": util.safe_int(img["width"]),
"height": util.safe_int(img["height"]),
}
return url, text.nameext_from_url(url, data)
pin["width"] = img["width"]
pin["height"] = img["height"]
return url, text.nameext_from_url(url, pin)


class PinterestPinExtractor(PinterestExtractor):
Expand All @@ -42,13 +39,9 @@ class PinterestPinExtractor(PinterestExtractor):
test = [
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
"keyword": "f651cb271247f306d1d30385d49c7b82da44c2b1",
"content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
}),
("https://www.pinterest.com/pin/858146903966145188/", {
"exception": exception.StopExtraction,
}),
("https://www.pinterest.com/pin/85814690396614518/", {
"exception": exception.NotFoundError,
}),
]
Expand All @@ -68,13 +61,13 @@ def items(self):
class PinterestBoardExtractor(PinterestExtractor):
"""Extractor for images from a board from pinterest.com"""
subcategory = "board"
directory_fmt = ["{category}", "{user}", "{board}"]
directory_fmt = ["{category}", "{board[owner][username]}", "{board[name]}"]
archive_fmt = "{board[id]}_{id}"
pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.[^/]+/"
r"(?!pin/)([^/?#&]+)/([^/?#&]+)"]
test = [
("https://www.pinterest.com/g1952849/test-/", {
"url": "85911dfca313f3f7f48c2aa0bc684f539d1d80a6",
"keyword": "c54cf5aa830994f2ed4871efa7154a5fdaa1c2ce",
}),
("https://www.pinterest.com/g1952848/test/", {
"exception": exception.NotFoundError,
Expand All @@ -87,27 +80,17 @@ def __init__(self, match):

def items(self):
board = self.api.board(self.user, self.board)
data = self.data_from_board(board)
data = {"board": board, "count": board["pin_count"]}
num = data["count"]
yield Message.Version, 1
yield Message.Directory, data
for pin in self.api.board_pins(self.user, self.board):
for pin in self.api.board_pins(board["id"]):
url, pdata = self.data_from_pin(pin)
data.update(pdata)
data["num"] = num
num -= 1
yield Message.Url, url, data

def data_from_board(self, board):
"""Get metadata from a board-object"""
data = {
"user": self.user,
"board_id": util.safe_int(board["id"]),
"board": board["name"],
"count": board["counts"]["pins"],
}
return data


class PinterestPinitExtractor(PinterestExtractor):
"""Extractor for images from a pin.it URL"""
Expand Down Expand Up @@ -136,59 +119,76 @@ def items(self):


class PinterestAPI():
"""Minimal interface for the pinterest API"""

def __init__(self, extractor, access_token=None):
"""Minimal interface for the Pinterest Web API
For a better and more complete implementation in PHP, see
- https://github.com/seregazhuk/php-pinterest-bot
"""

BASE_URL = "https://uk.pinterest.com"
HEADERS = {
"Accept" : "application/json, text/javascript, "
"*/*, q=0.01",
"Accept-Language" : "en-US,en;q=0.5",
"X-Pinterest-AppState": "active",
"X-APP-VERSION" : "cb1c7f9",
"X-Requested-With" : "XMLHttpRequest",
"Origin" : BASE_URL + "/",
}

def __init__(self, extractor):
self.log = extractor.log
self.session = extractor.session
self.access_token = (
access_token or
extractor.config("access-token") or
"AfyIXxi1MJ6et0NlIl_vBchHbex-FSWylPyr2GJE2uu3W8A97QAAAAA"
)

def pin(self, pin_id, fields="id,image,note"):
def pin(self, pin_id):
"""Query information about a pin"""
endpoint = "pins/{}/".format(pin_id)
params = {"fields": fields}
return self._call(endpoint, params)["data"]
options = {"id": pin_id, "field_set_key": "detailed"}
return self._call("Pin", options)["resource_response"]["data"]

def board(self, user, board, fields="id,name,counts"):
def board(self, user, board):
"""Query information about a board"""
endpoint = "boards/{}/{}/".format(user, board)
params = {"fields": fields}
return self._call(endpoint, params)["data"]
options = {"slug": board, "username": user,
"field_set_key": "detailed"}
return self._call("Board", options)["resource_response"]["data"]

def board_pins(self, user, board, fields="id,image,note", limit=100):
def board_pins(self, board_id):
"""Yield all pins of a specific board"""
endpoint = "boards/{}/{}/pins/".format(user, board)
params = {"fields": fields, "limit": limit}
return self._pagination(endpoint, params)

def _call(self, endpoint, params):
params["access_token"] = self.access_token
url = "https://api.pinterest.com/v1/" + endpoint
options = {"board_id": board_id}
return self._pagination("BoardFeed", options)

def _call(self, resource, options):
url = "{}/resource/{}Resource/get".format(self.BASE_URL, resource)
params = {
"source_url": "",
"data": json.dumps({"options": options}),
}

response = self.session.get(url, params=params)
status = response.status_code
response = self.session.get(url, params=params, headers=self.HEADERS)
data = response.json()

if 200 <= status < 400 and data.get("data"):
if 200 <= response.status_code < 400 and "resource_response" in data:
return data

msg = data.get("message", "")
if status == 404:
try:
msg = data["resource_response"]["error"]["message"]
except KeyError:
msg = ""
if response.status_code == 404:
msg = msg.partition(" ")[0].lower()
raise exception.NotFoundError(msg)
self.log.error("API request failed: %s", msg or "")
self.log.error("API request failed: %s", msg)
raise exception.StopExtraction()

def _pagination(self, endpoint, params):
def _pagination(self, resource, options, bookmarks=None):
while True:
response = self._call(endpoint, params)
yield from response["data"]

cursor = response["page"]["cursor"]
if not cursor:
if bookmarks:
options["bookmarks"] = bookmarks
data = self._call(resource, options)
yield from data["resource_response"]["data"]

try:
bookmarks = data["resource"]["options"]["bookmarks"]
if not bookmarks or bookmarks[0] == "-end-":
return
except KeyError:
return
params["cursor"] = cursor
1 change: 0 additions & 1 deletion test/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
# temporary issues, etc.
BROKEN = {
"gomanga", # server down
"pinterest", # access tokens have been set to 10 requests per hour
"puremashiro", # online reader down
}

Expand Down

2 comments on commit 55d4d23

@Hrxn
Copy link
Contributor

@Hrxn Hrxn commented on 55d4d23 Apr 24, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some kind of internal API?

@mikf
Copy link
Owner Author

@mikf mikf commented on 55d4d23 Apr 25, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, sort of. These are the endpoints used while browsing the site in a browser.

My browser's network inspector and especially the code of https://github.com/seregazhuk/php-pinterest-bot helped to make sense out of these.

Please sign in to comment.