Skip to content
This repository has been archived by the owner on Mar 9, 2021. It is now read-only.

Commit

Permalink
#270 Improves the inline vt.tumblr.com video host search
Browse files Browse the repository at this point in the history
Currently the inline video regex is new Regex("\"(http[A-Za-z0-9_/:.]*.com/video_file/[A-Za-z0-9_/:.]*)\""); which doesn't detect (already url resolved) inlined tumblr video urls (i.e. from different tumblr video posts).
Usually, the self-hosted tumblr videos have an url structure as the following: https://vt.tumblr.com/tumblr_ouv64tPd2n1wai44h{_rX}.mp4 which will not be picked up with the current regex.
  • Loading branch information
johanneszab committed Sep 17, 2018
1 parent 97ef627 commit bf6bbd8
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,9 @@ private void AddVideoUrlToDownloadList(TumblrApiJson document)
{
if (CheckIfDownloadRebloggedPosts(post))
{
if (post.video_caption != null)
{
//var postCopy = (Post)post.Clone();
AddInlineVideoUrl(post);
}
//var postCopy = (Post)post.Clone();
AddInlineVttTumblrVideoUrl(post);
AddInlineVideoUrl(post);
}
}
}
Expand Down Expand Up @@ -769,6 +767,28 @@ private void AddInlinePhotoUrl(Post post)
}
}

private void AddInlineVttTumblrVideoUrl(Post post)
{
var regex = new Regex("\"(https?://vtt.tumblr.com/(tumblr_[A-Za-z0-9]*))");
foreach (Match match in regex.Matches(InlineSearch(post)))
{
string videoUrl = match.Groups[1].Value;
if (shellService.Settings.VideoSize == 1080)
{
AddToDownloadList(new VideoPost(videoUrl + ".mp4", post.id, post.unix_timestamp.ToString()));
//AddToJsonQueue(new TumblrCrawlerXmlData(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));

}
else if (shellService.Settings.VideoSize == 480)
{
AddToDownloadList(new VideoPost(
videoUrl + "_480.mp4",
post.id, post.unix_timestamp.ToString()));
//AddToJsonQueue(new TumblrCrawlerXmlData(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
}
}
}

private void AddInlineVideoUrl(Post post)
{
var regex = new Regex("\"(http[A-Za-z0-9_/:.]*.com/video_file/[A-Za-z0-9_/:.]*)\"");
Expand All @@ -784,7 +804,7 @@ private void AddInlineVideoUrl(Post post)
else if (shellService.Settings.VideoSize == 480)
{
AddToDownloadList(new VideoPost(
"https://vt.tumblr.com/" + videoUrl.Replace("/480", "").Split('/').Last() + "_480.mp4",
"https://vtt.tumblr.com/" + videoUrl.Replace("/480", "").Split('/').Last() + "_480.mp4",
post.id, post.unix_timestamp.ToString()));
//AddToJsonQueue(new TumblrCrawlerXmlData(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ private void AddVideoUrlToDownloadList(TumblrJson document)
{
var postCopy = (Post)post.Clone();
postCopy.video_url = string.Empty;
AddInlineVttTumblrVideoUrl(postCopy);
AddInlineVideoUrl(postCopy);
}
}
Expand All @@ -561,7 +562,10 @@ private void AddVideoUrlToDownloadList(TumblrJson document)
if (post.type != "video" && CheckIfContainsTaggedPost(post))
{
if (CheckIfDownloadRebloggedPosts(post))
{
AddInlineVttTumblrVideoUrl(post);
AddInlineVideoUrl(post);
}
}
}
}
Expand All @@ -585,6 +589,27 @@ private void AddVideoUrl(Post post)
AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
}

private void AddInlineVttTumblrVideoUrl(Post post)
{
if (post.caption == null)
return;
var regex = new Regex("\"(https?://vtt.tumblr.com/(tumblr_[A-Za-z0-9]*))");
foreach (Match match in regex.Matches(post.caption))
{
string videoUrl = match.Groups[1].Value;
if (shellService.Settings.VideoSize == 1080)
{
AddToDownloadList(new VideoPost(videoUrl + ".mp4", post.id, post.timestamp.ToString()));
//AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
}
else if (shellService.Settings.VideoSize == 480)
{
AddToDownloadList(new VideoPost(videoUrl + "_480.mp4", post.id, post.timestamp.ToString()));
//AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
}
}
}

private void AddInlineVideoUrl(Post post)
{
if (post.caption == null)
Expand All @@ -601,7 +626,7 @@ private void AddInlineVideoUrl(Post post)
else if (shellService.Settings.VideoSize == 480)
{
AddToDownloadList(new VideoPost(
"https://vt.tumblr.com/" + videoUrl.Replace("/480", "").Split('/').Last() + "_480.mp4",
"https://vtt.tumblr.com/" + videoUrl.Replace("/480", "").Split('/').Last() + "_480.mp4",
post.id, post.timestamp.ToString()));
//AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(videoUrl.Split('/').Last(), ".json"), post));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public Regex GetLoliSafeUrlRegex()
public string CreateLoliSafeUrl(string id, string detectedUrl, LoliSafeTypes type)
{
string url;
switch ( type)
switch (type)
{
case LoliSafeTypes.Mp4:
url = @"https://3dx.pw/" + id + ".mp4";
Expand Down

0 comments on commit bf6bbd8

Please sign in to comment.