1.0.25 - Added photobucket ripper #8

RipMeApp · Apr 23, 2014 · 2875ee8 · 2875ee8
1 parent a8728ec
commit 2875ee8
Show file tree

Hide file tree

Showing 3 changed files with 223 additions and 2 deletions.
diff --git a/pom.xml b/pom.xml
@@ -4,7 +4,7 @@
   <groupId>com.rarchives.ripme</groupId>
   <artifactId>ripme</artifactId>
   <packaging>jar</packaging>
-  <version>1.0.24</version>
+  <version>1.0.25</version>
   <name>ripme</name>
   <url>http://rip.rarchives.com</url>
   <properties>

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
@@ -0,0 +1,221 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Connection.Response;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import com.rarchives.ripme.ripper.AlbumRipper;
+
+public class PhotobucketRipper extends AlbumRipper {
+
+    private static final String DOMAIN = "photobucket.com",
+                                HOST   = "photobucket";
+    private static final Logger logger = Logger.getLogger(PhotobucketRipper.class);
+
+    private Response pageResponse = null;
+
+    public PhotobucketRipper(URL url) throws IOException {
+        super(url);
+    }
+
+    @Override
+    public String getHost() {
+        return HOST;
+    }
+
+    public URL sanitizeURL(URL url) throws MalformedURLException {
+        logger.info(url);
+        String u = url.toExternalForm();
+        if (u.contains("?")) {
+            u = u.substring(0, u.indexOf("?"));
+            return new URL(u);
+        }
+        else {
+            return url;
+        }
+    }
+
+    public String getAlbumTitle(URL url) throws MalformedURLException {
+        try {
+            // Attempt to use album title as GID
+            if (pageResponse == null) {
+                pageResponse = Jsoup.connect(url.toExternalForm()).execute();
+            }
+            Document albumDoc = pageResponse.parse();
+            Elements els = albumDoc.select("div.libraryTitle > h1");
+            if (els.size() == 0) {
+                throw new IOException("Could not find libraryTitle at " + url);
+            }
+            return els.get(0).text();
+        } catch (IOException e) {
+            // Fall back to default album naming convention
+        }
+        return super.getAlbumTitle(url);
+    }
+
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+        Pattern p; Matcher m;
+
+        // http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1
+        p = Pattern.compile("^https?://[a-zA-Z0-9]+\\.photobucket\\.com/user/([a-zA-Z0-9_\\-]+)/library.*$");
+        m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            return m.group(1);
+        }
+
+        throw new MalformedURLException(
+                "Expected photobucket.com gallery formats: "
+                        + "http://x###.photobucket.com/username/library/..."
+                        + " Got: " + url);
+    }
+
+    @Override
+    public void rip() throws IOException {
+        List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
+
+        List<String> subsToRip = new ArrayList<String>(),
+                    rippedSubs = new ArrayList<String>();
+
+        for (String sub : subalbums) {
+            subsToRip.add(sub);
+        }
+
+        while (subsToRip.size() > 0 && !isStopped()) {
+            try {
+                Thread.sleep(1000);
+            } catch (InterruptedException e) {
+                break;
+            }
+            String nextSub = subsToRip.remove(0);
+            rippedSubs.add(nextSub);
+            logger.info("Attempting to rip next subalbum: " + nextSub);
+            try {
+                pageResponse = null;
+                subalbums = ripAlbumAndGetSubalbums(nextSub);
+            } catch (IOException e) {
+                logger.error("Error while ripping " + nextSub, e);
+                break;
+            }
+            for (String subalbum : subalbums) {
+                if (!subsToRip.contains(subalbum) && !rippedSubs.contains(subalbum)) {
+                    subsToRip.add(subalbum);
+                }
+            }
+        }
+        waitForThreads();
+    }
+
+    public List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
+        int filesIndex = 0,
+            filesTotal = 0,
+            pageIndex = 0;
+        String currentAlbumPath = null,
+               url = null;
+
+        while (pageIndex == 0 || filesIndex < filesTotal) {
+            if (isStopped()) {
+                break;
+            }
+            pageIndex++;
+            if (pageIndex > 1 || pageResponse == null) {
+                url = theUrl + String.format("?sort=3&page=", pageIndex);
+                logger.info("    Retrieving " + url);
+                pageResponse = Jsoup.connect(url).execute();
+            }
+            Document albumDoc = pageResponse.parse();
+            // Retrieve JSON from request
+            String jsonString = null;
+            for (Element script : albumDoc.select("script[type=text/javascript]")) {
+                String data = script.data();
+                if (!data.contains("libraryAlbumsPageCollectionData")) {
+                    continue;
+                }
+                // Ensure this chunk of javascript contains the album info
+                // Grab the JSON
+                Pattern p; Matcher m;
+                p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL);
+                m = p.matcher(data);
+                if (m.matches()) {
+                    jsonString = m.group(1);
+                    break;
+                }
+            }
+            if (jsonString == null) {
+                logger.error("Unable to find JSON data at URL: " + url);
+                break;
+            }
+            JSONObject json = new JSONObject(jsonString);
+            JSONObject items = json.getJSONObject("items");
+            JSONArray objects = items.getJSONArray("objects");
+            filesTotal = items.getInt("total");
+            currentAlbumPath = json.getString("currentAlbumPath");
+            for (int i = 0; i < objects.length(); i++) {
+                JSONObject object = objects.getJSONObject(i);
+                String image = object.getString("fullsizeUrl");
+                filesIndex += 1;
+                addURLToDownload(new URL(image), 
+                        "",
+                        object.getString("location"),
+                        albumDoc.location(),
+                        pageResponse.cookies());
+            }
+        }
+        // Get subalbums
+        if (url != null) {
+            return getSubAlbums(url, currentAlbumPath);
+        } else {
+            return new ArrayList<String>();
+        }
+    }
+
+    private List<String> getSubAlbums(String url, String currentAlbumPath) {
+        List<String> result = new ArrayList<String>();
+        String subdomain = url.substring(url.indexOf("://")+3);
+        subdomain = subdomain.substring(0, subdomain.indexOf("."));
+        String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"
+                + "?deferCollapsed=true"
+                + "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy"
+                + "&json=1";
+        try {
+            logger.info("Loading " + apiUrl);
+            Document doc = Jsoup.connect(apiUrl)
+                                .ignoreContentType(true)
+                                .referrer(url)
+                                .get();
+            String jsonString = doc.body().html().replace("&quot;", "\"");
+            JSONObject json = new JSONObject(jsonString);
+            JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums");
+            for (int i = 0; i < subalbums.length(); i++) {
+                String suburl = 
+                        "http://"
+                        + subdomain
+                        + ".photobucket.com"
+                        + subalbums.getJSONObject(i).getString("path");
+                suburl = suburl.replace(" ", "%20");
+                result.add(suburl);
+            }
+        } catch (IOException e) {
+            logger.error("Failed to get subalbums from " + apiUrl, e);
+        }
+        return result;
+    }
+
+    public boolean canRip(URL url) {
+        return url.getHost().endsWith(DOMAIN);
+    }
+
+}
diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@@ -19,7 +19,7 @@
 public class UpdateUtils {
 
     private static final Logger logger = Logger.getLogger(UpdateUtils.class);
-    private static final String DEFAULT_VERSION = "1.0.24";
+    private static final String DEFAULT_VERSION = "1.0.25";
     private static final String updateJsonURL = "http://rarchives.com/ripme.json";
     private static final String updateJarURL = "http://rarchives.com/ripme.jar";
     private static final String mainFileName = "ripme.jar";