diff --git a/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs index 830b3eb..317e9ce 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs @@ -252,6 +252,9 @@ protected void AddGenericPhotoUrl(string post) { foreach (string imageUrl in tumblrParser.SearchForGenericPhotoUrl(post)) { + if (tumblrParser.IsTumblrUrl(imageUrl)) + continue; + if (CheckIfSkipGif(imageUrl)) continue; @@ -263,15 +266,10 @@ protected void AddGenericVideoUrl(string post) { foreach (string videoUrl in tumblrParser.SearchForGenericVideoUrl(post)) { - string url = videoUrl; - if (url.Contains("tumblr") && shellService.Settings.VideoSize == 480) - { - int indexOfSuffix = url.LastIndexOf('.'); - if (indexOfSuffix >= 0) - url = url.Insert(indexOfSuffix, "_480"); - } - - AddToDownloadList(new VideoPost(url, Guid.NewGuid().ToString("N"))); + if (tumblrParser.IsTumblrUrl(videoUrl)) + continue; + + AddToDownloadList(new VideoPost(videoUrl, Guid.NewGuid().ToString("N"))); } } } diff --git a/src/TumblThree/TumblThree.Applications/Parser/ITumblrParser.cs b/src/TumblThree/TumblThree.Applications/Parser/ITumblrParser.cs index c8a8c59..c52527a 100644 --- a/src/TumblThree/TumblThree.Applications/Parser/ITumblrParser.cs +++ b/src/TumblThree/TumblThree.Applications/Parser/ITumblrParser.cs @@ -24,5 +24,7 @@ public interface ITumblrParser IEnumerable SearchForGenericPhotoUrl(string searchableText); IEnumerable SearchForGenericVideoUrl(string searchableText); + + bool IsTumblrUrl(string url); } } diff --git a/src/TumblThree/TumblThree.Applications/Parser/TumblrParser.cs b/src/TumblThree/TumblThree.Applications/Parser/TumblrParser.cs index 2fec2de..e359b43 100644 --- a/src/TumblThree/TumblThree.Applications/Parser/TumblrParser.cs +++ b/src/TumblThree/TumblThree.Applications/Parser/TumblrParser.cs @@ -64,5 +64,11 @@ public IEnumerable SearchForGenericVideoUrl(string searchableText) yield return videoUrl; } } + + public bool IsTumblrUrl(string url) + { + var regex = new Regex("tumblr_[\\w]*"); + return regex.IsMatch(url); + } } } diff --git a/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs b/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs index 333b49c..b6737dc 100644 --- a/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs +++ b/src/TumblThree/TumblThree.Presentation/Properties/Resources.Designer.cs @@ -2202,7 +2202,8 @@ public static string ToolTipRegExPhotos { /// /// Looks up a localized string similar to Uses regular expressions to search for images in everything TumblThree scans. - ///This will add plenty of duplicate image urls to the queue, but might gather images from websites that are currently not supported by a specifically written parser.. + ///This might add plenty of duplicate or random image urls to the queue, but could potentially + ///gather images from websites that are currently not supported by a specifically written parser.. /// public static string ToolTipRegExPhotosDescription { get { @@ -2221,7 +2222,8 @@ public static string ToolTipRegExVideos { /// /// Looks up a localized string similar to Uses regular expressions to search for videos in everything TumblThree scans. - ///This will add plenty of duplicate video urls to the queue, but might gather videos from websites that are currently not supported by a specifically written parser.. + ///This might add plenty of duplicate or random video urls to the queue, but could potentially + ///gather videos from websites that are currently not supported by a specifically written parser.. /// public static string ToolTipRegExVideosDescription { get { @@ -2257,7 +2259,7 @@ public static string ToolTipScan { } /// - /// Looks up a localized string similar to Sets the number connections used for scanning. + /// Looks up a localized string similar to Sets the number of connections used for scanning. ///Since the data is usually small, you should leave this high. ///Note: This setting has no impact if the Limit Tumblr Api Connections setting is turned on.. /// @@ -2368,7 +2370,7 @@ public static string ToolTipVideoConnections { } /// - /// Looks up a localized string similar to The vt.tumblr.com host regularly closes connections if the number is too high.. + /// Looks up a localized string similar to The v*.tumblr.com hosts regularly close connections if the number is too high.. /// public static string ToolTipVideoConnectionsDescription { get { diff --git a/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx b/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx index 0739cf8..f04c76e 100644 --- a/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx +++ b/src/TumblThree/TumblThree.Presentation/Properties/Resources.resx @@ -476,7 +476,7 @@ since the throttling algorithm is too slow to adjust and might negatively impact Number of connections used for each scan - Sets the number connections used for scanning. + Sets the number of connections used for scanning. Since the data is usually small, you should leave this high. Note: This setting has no impact if the Limit Tumblr Api Connections setting is turned on. @@ -700,7 +700,7 @@ E.g. great big car, bears searches for great big cars and bears. Number of concurrent connections to the tumblr video host - The vt.tumblr.com host regularly closes connections if the number is too high. + The v*.tumblr.com hosts regularly close connections if the number is too high. External @@ -927,13 +927,15 @@ This value determines the information refresh rate for each individual queued bl Uses regular expressions to search for images in everything TumblThree scans. -This will add plenty of duplicate image urls to the queue, but might gather images from websites that are currently not supported by a specifically written parser. +This might add plenty of duplicate or random image urls to the queue, but could potentially +gather images from websites that are currently not supported by a specifically written parser. Search for videos in the crawl data Uses regular expressions to search for videos in everything TumblThree scans. -This will add plenty of duplicate video urls to the queue, but might gather videos from websites that are currently not supported by a specifically written parser. +This might add plenty of duplicate or random video urls to the queue, but could potentially +gather videos from websites that are currently not supported by a specifically written parser. \ No newline at end of file