Skip to content
This repository has been archived by the owner on Mar 9, 2021. It is now read-only.

Commit

Permalink
Cache StreamWriter instances for text downloading
Browse files Browse the repository at this point in the history
Stores StreamWriter instances in Dictionary and reuses them for recurring text appends in text post downloading.
This prevents massive seek I/O in large blog downloads.
  • Loading branch information
johanneszab committed Jun 21, 2020
1 parent 8d452b5 commit 7c3699a
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 11 deletions.
4 changes: 2 additions & 2 deletions src/TumblThree/SharedAssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@

[assembly: ComVisible(false)]
[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)]
[assembly: AssemblyVersion("1.0.8.73")]
[assembly: AssemblyFileVersion("1.0.8.73")]
[assembly: AssemblyVersion("1.0.8.74")]
[assembly: AssemblyFileVersion("1.0.8.74")]
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
namespace TumblThree.Applications.Controllers
{
[Export]
internal class CrawlerController
internal class CrawlerController : IDisposable
{
private readonly ICrawlerFactory crawlerFactory;
private readonly ICrawlerService crawlerService;
Expand Down Expand Up @@ -178,6 +178,7 @@ private async Task RunCrawlerTasksAsync(CancellationToken ct, PauseToken pt)

ICrawler crawler = crawlerFactory.GetCrawler(blog, ct, pt, new Progress<DownloadProgress>());
crawler.IsBlogOnlineAsync().Wait(4000);
crawler.Dispose();

if (crawlerService.ActiveItems.Any(item =>
item.Blog.Name.Equals(nextQueueItem.Blog.Name) &&
Expand Down Expand Up @@ -215,6 +216,7 @@ private async Task StartSiteSpecificDownloaderAsync(QueueListItem queueListItem,

ICrawler crawler = crawlerFactory.GetCrawler(blog, ct, pt, progress);
await crawler.CrawlAsync();
crawler.Dispose();

Monitor.Enter(lockObject);
QueueOnDispatcher.CheckBeginInvokeOnUI(() => crawlerService.RemoveActiveItem(queueListItem));
Expand All @@ -233,5 +235,19 @@ private ProgressThrottler<DownloadProgress> SetupThrottledQueueListProgress(Queu
var progressHandler = new Progress<DownloadProgress>(value => { queueListItem.Progress = value.Progress; });
return new ProgressThrottler<DownloadProgress>(progressHandler, shellService.Settings.ProgressUpdateInterval);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
crawlerCancellationTokenSource?.Dispose();
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ private async Task CheckStatusOfBlogsAsync(SemaphoreSlim semaphoreSlim, IBlog bl
ICrawler crawler = crawlerFactory.GetCrawler(blog, new CancellationToken(), new PauseToken(),
new Progress<DownloadProgress>());
await crawler.IsBlogOnlineAsync();
crawler.Dispose();
}
finally
{
Expand Down Expand Up @@ -645,6 +646,7 @@ private async Task UpdateMetaInformationAsync(IBlog blog)
new Progress<DownloadProgress>());

await crawler.UpdateMetaInformationAsync();
crawler.Dispose();
}

private IBlog CheckIfCrawlableBlog(string blogUrl)
Expand Down
5 changes: 3 additions & 2 deletions src/TumblThree/TumblThree.Applications/Crawler/ICrawler.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
using System.Threading.Tasks;
using System;
using System.Threading.Tasks;

namespace TumblThree.Applications.Crawler
{
public interface ICrawler
public interface ICrawler : IDisposable
{
Task CrawlAsync();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -700,5 +700,20 @@ private async Task AddExternalPhotoUrlToDownloadListAsync(Post post)

if (blog.DownloadCatBox) AddCatBoxUrl(searchableText, timestamp);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -666,5 +666,20 @@ private async Task AddExternalPhotoUrlToDownloadListAsync(Post post)

if (blog.DownloadCatBox) AddCatBoxUrl(searchableText, timestamp);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -255,5 +255,20 @@ private void AddVideoUrlToDownloadList(string document)
if (blog.RegExVideos)
AddGenericVideoUrl(document);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -206,5 +206,20 @@ private void AddVideoUrlToDownloadList(string document)
if (blog.RegExVideos)
AddGenericVideoUrl(document);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -247,5 +247,20 @@ private void AddVideoUrlToDownloadList(string document)
if (blog.RegExVideos)
AddGenericVideoUrl(document);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public abstract class AbstractDownloader : IDownloader

private SemaphoreSlim concurrentConnectionsSemaphore;
private SemaphoreSlim concurrentVideoConnectionsSemaphore;
private readonly Dictionary<string, StreamWriter> streamWriters = new Dictionary<string, StreamWriter>();

protected AbstractDownloader(IShellService shellService, IManagerService managerService, CancellationToken ct,
PauseToken pt, IProgress<DownloadProgress> progress, IPostQueue<TumblrPost> postQueue, FileDownloader fileDownloader,
Expand Down Expand Up @@ -123,10 +124,8 @@ protected virtual bool AppendToTextFile(string fileLocation, string text)
{
lock (lockObjectDownload)
{
using (var sw = new StreamWriter(fileLocation, true))
{
sw.WriteLine(text);
}
StreamWriter sw = GetTextAppenderStreamWriter(fileLocation);
sw.WriteLine(text);
}

return true;
Expand All @@ -144,6 +143,18 @@ protected virtual bool AppendToTextFile(string fileLocation, string text)
}
}

private StreamWriter GetTextAppenderStreamWriter(string key)
{
if (streamWriters.ContainsKey(key))
{
return streamWriters[key];
}
StreamWriter sw = new StreamWriter(key, true);
streamWriters.Add(key, sw);

return sw;
}

public virtual async Task<bool> DownloadBlogAsync()
{
concurrentConnectionsSemaphore =
Expand Down Expand Up @@ -335,5 +346,25 @@ protected void CheckIfShouldPause()
if (pt.IsPaused)
pt.WaitWhilePausedWithResponseAsyc().Wait();
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
concurrentConnectionsSemaphore?.Dispose();
concurrentVideoConnectionsSemaphore?.Dispose();

foreach (var sw in streamWriters.Values)
{
sw.Dispose();
}
}
}

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
using System.Threading.Tasks;
using System;
using System.Threading.Tasks;

namespace TumblThree.Applications.Downloader
{
public interface IDownloader
public interface IDownloader : IDisposable
{
Task<bool> DownloadBlogAsync();

Expand Down

0 comments on commit 7c3699a

Please sign in to comment.