-
Notifications
You must be signed in to change notification settings - Fork 631
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1.0.25 - Added photobucket ripper #8
- Loading branch information
Showing
3 changed files
with
223 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
221 changes: 221 additions & 0 deletions
221
src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import org.apache.log4j.Logger; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
import org.jsoup.Connection.Response; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
|
||
import com.rarchives.ripme.ripper.AlbumRipper; | ||
|
||
public class PhotobucketRipper extends AlbumRipper { | ||
|
||
private static final String DOMAIN = "photobucket.com", | ||
HOST = "photobucket"; | ||
private static final Logger logger = Logger.getLogger(PhotobucketRipper.class); | ||
|
||
private Response pageResponse = null; | ||
|
||
public PhotobucketRipper(URL url) throws IOException { | ||
super(url); | ||
} | ||
|
||
@Override | ||
public String getHost() { | ||
return HOST; | ||
} | ||
|
||
public URL sanitizeURL(URL url) throws MalformedURLException { | ||
logger.info(url); | ||
String u = url.toExternalForm(); | ||
if (u.contains("?")) { | ||
u = u.substring(0, u.indexOf("?")); | ||
return new URL(u); | ||
} | ||
else { | ||
return url; | ||
} | ||
} | ||
|
||
public String getAlbumTitle(URL url) throws MalformedURLException { | ||
try { | ||
// Attempt to use album title as GID | ||
if (pageResponse == null) { | ||
pageResponse = Jsoup.connect(url.toExternalForm()).execute(); | ||
} | ||
Document albumDoc = pageResponse.parse(); | ||
Elements els = albumDoc.select("div.libraryTitle > h1"); | ||
if (els.size() == 0) { | ||
throw new IOException("Could not find libraryTitle at " + url); | ||
} | ||
return els.get(0).text(); | ||
} catch (IOException e) { | ||
// Fall back to default album naming convention | ||
} | ||
return super.getAlbumTitle(url); | ||
} | ||
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException { | ||
Pattern p; Matcher m; | ||
|
||
// http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1 | ||
p = Pattern.compile("^https?://[a-zA-Z0-9]+\\.photobucket\\.com/user/([a-zA-Z0-9_\\-]+)/library.*$"); | ||
m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return m.group(1); | ||
} | ||
|
||
throw new MalformedURLException( | ||
"Expected photobucket.com gallery formats: " | ||
+ "http://x###.photobucket.com/username/library/..." | ||
+ " Got: " + url); | ||
} | ||
|
||
@Override | ||
public void rip() throws IOException { | ||
List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm()); | ||
|
||
List<String> subsToRip = new ArrayList<String>(), | ||
rippedSubs = new ArrayList<String>(); | ||
|
||
for (String sub : subalbums) { | ||
subsToRip.add(sub); | ||
} | ||
|
||
while (subsToRip.size() > 0 && !isStopped()) { | ||
try { | ||
Thread.sleep(1000); | ||
} catch (InterruptedException e) { | ||
break; | ||
} | ||
String nextSub = subsToRip.remove(0); | ||
rippedSubs.add(nextSub); | ||
logger.info("Attempting to rip next subalbum: " + nextSub); | ||
try { | ||
pageResponse = null; | ||
subalbums = ripAlbumAndGetSubalbums(nextSub); | ||
} catch (IOException e) { | ||
logger.error("Error while ripping " + nextSub, e); | ||
break; | ||
} | ||
for (String subalbum : subalbums) { | ||
if (!subsToRip.contains(subalbum) && !rippedSubs.contains(subalbum)) { | ||
subsToRip.add(subalbum); | ||
} | ||
} | ||
} | ||
waitForThreads(); | ||
} | ||
|
||
public List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException { | ||
int filesIndex = 0, | ||
filesTotal = 0, | ||
pageIndex = 0; | ||
String currentAlbumPath = null, | ||
url = null; | ||
|
||
while (pageIndex == 0 || filesIndex < filesTotal) { | ||
if (isStopped()) { | ||
break; | ||
} | ||
pageIndex++; | ||
if (pageIndex > 1 || pageResponse == null) { | ||
url = theUrl + String.format("?sort=3&page=", pageIndex); | ||
logger.info(" Retrieving " + url); | ||
pageResponse = Jsoup.connect(url).execute(); | ||
} | ||
Document albumDoc = pageResponse.parse(); | ||
// Retrieve JSON from request | ||
String jsonString = null; | ||
for (Element script : albumDoc.select("script[type=text/javascript]")) { | ||
String data = script.data(); | ||
if (!data.contains("libraryAlbumsPageCollectionData")) { | ||
continue; | ||
} | ||
// Ensure this chunk of javascript contains the album info | ||
// Grab the JSON | ||
Pattern p; Matcher m; | ||
p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL); | ||
m = p.matcher(data); | ||
if (m.matches()) { | ||
jsonString = m.group(1); | ||
break; | ||
} | ||
} | ||
if (jsonString == null) { | ||
logger.error("Unable to find JSON data at URL: " + url); | ||
break; | ||
} | ||
JSONObject json = new JSONObject(jsonString); | ||
JSONObject items = json.getJSONObject("items"); | ||
JSONArray objects = items.getJSONArray("objects"); | ||
filesTotal = items.getInt("total"); | ||
currentAlbumPath = json.getString("currentAlbumPath"); | ||
for (int i = 0; i < objects.length(); i++) { | ||
JSONObject object = objects.getJSONObject(i); | ||
String image = object.getString("fullsizeUrl"); | ||
filesIndex += 1; | ||
addURLToDownload(new URL(image), | ||
"", | ||
object.getString("location"), | ||
albumDoc.location(), | ||
pageResponse.cookies()); | ||
} | ||
} | ||
// Get subalbums | ||
if (url != null) { | ||
return getSubAlbums(url, currentAlbumPath); | ||
} else { | ||
return new ArrayList<String>(); | ||
} | ||
} | ||
|
||
private List<String> getSubAlbums(String url, String currentAlbumPath) { | ||
List<String> result = new ArrayList<String>(); | ||
String subdomain = url.substring(url.indexOf("://")+3); | ||
subdomain = subdomain.substring(0, subdomain.indexOf(".")); | ||
String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList" | ||
+ "?deferCollapsed=true" | ||
+ "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy" | ||
+ "&json=1"; | ||
try { | ||
logger.info("Loading " + apiUrl); | ||
Document doc = Jsoup.connect(apiUrl) | ||
.ignoreContentType(true) | ||
.referrer(url) | ||
.get(); | ||
String jsonString = doc.body().html().replace(""", "\""); | ||
JSONObject json = new JSONObject(jsonString); | ||
JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums"); | ||
for (int i = 0; i < subalbums.length(); i++) { | ||
String suburl = | ||
"http://" | ||
+ subdomain | ||
+ ".photobucket.com" | ||
+ subalbums.getJSONObject(i).getString("path"); | ||
suburl = suburl.replace(" ", "%20"); | ||
result.add(suburl); | ||
} | ||
} catch (IOException e) { | ||
logger.error("Failed to get subalbums from " + apiUrl, e); | ||
} | ||
return result; | ||
} | ||
|
||
public boolean canRip(URL url) { | ||
return url.getHost().endsWith(DOMAIN); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters