-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConsoleApplication.cs
463 lines (389 loc) · 13.3 KB
/
ConsoleApplication.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
// ***********************************************************************
// Assembly : DataExtractNetCore5
// Author : Leo Mrozek
// Created : 09-17-2020
//
// Last Modified By : Leo Mrozek
// Last Modified On : 11-29-2021
// ***********************************************************************
// <copyright file="ConsoleApplication.cs" company="Leo Mrozek Consulting">
// Leo Mrozek
// </copyright>
// <summary></summary>
// ***********************************************************************
using DataExtract.Classes;
using DataExtract.Interfaces;
using System;
using System.Data;
using System.IO;
using System.Reflection;
using Newtonsoft.Json;
using Microsoft.Extensions.Logging;
using System.Linq;
using System.Text.RegularExpressions;
using File = System.IO.File;
#if NET5_0
using System.Net;
#endif
#if NET6_0_OR_GREATER
using System.Net.Http;
#endif
namespace DataExtract
{
/// <summary>
/// Class ConsoleApplication.
/// </summary>
public class ConsoleApplication
{
/// <summary>
/// The logger for the application
/// </summary>
private readonly ILogger<ConsoleApplication> _logger;
/// <summary>
/// The Data Access Layer (DAL) for the application
/// </summary>
private readonly IDal _dal;
/// <summary>
/// Stores CommandLineOptions passed in as formatted arguments
/// </summary>
private CommandLineOptions _opts;
/// <summary>
/// Initializes class with needed injected classes
/// </summary>
/// <param name="logger">The logger.</param>
/// <param name="dal">The dal.</param>
public ConsoleApplication(ILogger<ConsoleApplication> logger, IDal dal)
{
_logger = logger;
_dal = dal;
}
// Link to the CSV file that contains the data on world.data
private const string Url = "https://query.data.world/s/omp3uuql6d4u2cugslj7kdtbhs4q4k";
// Spreadsheet name on world.data
private const string FileName = "Covid-19-Activity.csv";
private readonly string _fileFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
/// <summary>
/// Entry point of the application that download, extracts from CSV and saves to database.
/// </summary>
/// <param name="opts">CommandLineParameters passed from main</param>
public void Run(CommandLineOptions opts)
{
_logger.LogInformation("Starting application");
// Save CommandLineOptions into local variable
_opts = opts;
var drRowData = _dal.GetRowData().Tables[0].Rows[0];
var rowCounts = (int)drRowData["RowCount"];
var lastDate = drRowData["LastDataDate"].ToString();
// Set the path to the CSV File
var csvFile = _fileFolder + $@"\{FileName}";
if (opts.SkipDownload)
{
_logger.LogError("Skipping downloading CSV File");
if (!GetLatestSavedFile(csvFile))
{
if (!_opts.WaitForExit)
{
Environment.Exit(0);
}
Console.ReadKey(false);
Environment.Exit(0);
}
}
switch (opts.SkipDownload)
{
// If we were skipping Download and the file doesn't exist
case true when !File.Exists(csvFile):
// delete the existing file if it exists
if (!DeleteCsvFile(csvFile))
{
if (!_opts.WaitForExit)
{
Environment.Exit(0);
}
Console.ReadKey(false);
Environment.Exit(0);
}
break;
case false:
{
DownloadFile(csvFile);
if (!File.Exists(FileName))
{
_logger.LogError("Error Downloading CSV File");
return;
}
break;
}
}
// This works with Any CSV File to load into a data table
_logger.LogInformation("Loading CSV File to DataTable");
var dt = _dal.GetDataTableFromCsvFile(csvFile);
_logger.LogInformation("Completed loading CSV File to DataTable");
_logger.LogInformation("Get Row Counts from CSV File");
if (LogRowCounts(dt, rowCounts, lastDate)) return;
// Truncate working data table of all data
_logger.LogInformation("Truncating Working Table");
_dal.TruncateData(Dal.DataTables.Working);
// Bulk Load Data into working data table
_logger.LogInformation("Loading Data to Working Table");
if (opts.FullReload || opts.ForceReload)
{
// Bulk load full All data to staging table
_logger.LogInformation("Loading Data with Full Reload");
_dal.BulkLoadData(dt);
// Truncating Production data table of all data
_logger.LogInformation("Truncating Data in Production Table");
_dal.TruncateData(Dal.DataTables.Production);
}
else
{
// Bulk load only today's data to staging table
_logger.LogInformation("Loading Data with Today Only Load");
_dal.BulkLoadData(dt.Select($"REPORT_DATE > #{lastDate}#").CopyToDataTable());
}
// Load production data table from the staging table
_logger.LogInformation("Loading Production Table");
LoadProductionData();
_logger.LogInformation("Finished Loading Production Table");
// Reindex production tables
_logger.LogInformation(@"Re-Indexing Production Table");
_dal.RebuildIndexes();
// Clean up CSV File
if (opts.KeepDownload)
{
_logger.LogInformation("Saving CSV File.");
SaveActivityFile(csvFile);
}
_logger.LogInformation("Cleaning up downloaded file");
DeleteCsvFile(csvFile);
if (!_opts.WaitForExit) return;
_logger.LogInformation("Finished, Press any key to continue...");
Console.ReadKey(false);
}
/// <summary>
/// Download the file from world.data based on .NET version
/// </summary>
/// <param name="csvFile"></param>
private void DownloadFile(string csvFile)
{
_logger.LogInformation(" Downloading CSV File");
#if NET6_0_OR_GREATER
if (!DownloadFileNet(Url, csvFile))
{
if (!_opts.WaitForExit) return;
_logger.LogInformation("Finished, Press any key to continue...");
Console.ReadKey(false);
}
#elif NET5_0
if(!(DownloadFileNet(Url, csvFile))
if (!_opts.WaitForExit) return;
_logger.LogInformation("Finished, Press any key to continue...");
Console.ReadKey(false);
}
#else
// Code here for version <.NET 5 (Like Core 3.1, Core 3.0, etc.
#endif
_logger.LogInformation("Finished downloading CSV File");
}
/// <summary>
/// Deletes the CSV file that is downloaded (if it exists)
/// </summary>
/// <param name="csvFile">Name of the CSV file to delete</param>
private bool DeleteCsvFile(string csvFile)
{
if (!File.Exists(csvFile)) { return true; }
_logger.LogInformation(" Deleting existing Download file");
File.Delete(csvFile);
if (!File.Exists(csvFile)) { return true; }
_logger.LogError(" Error deleting existing CSV File");
return false;
}
/// <summary>
/// Saving the downloaded CSV file and giving it a time/date stamp.
/// Also allows for reusing the saved file instead of downloading a new file.
/// </summary>
/// <param name="csvFile"></param>
private void SaveActivityFile(string csvFile)
{
var regex = new Regex("_[0-9]+[.]");
var cnt = 1;
var rename = $"{csvFile.Replace(".csv", string.Empty)}_{DateTime.Now:yyyyMMdd}_{cnt}.csv";
var di = new DirectoryInfo(_fileFolder).EnumerateFiles("*_??.csv").ToList();
var file = di.OrderByDescending(f => f.Name).FirstOrDefault();
_logger.LogInformation(" Renaming csvFile to save it.");
if (file == null)
{
File.Copy(csvFile, rename);
}
else
{
var match = regex.Match(file.Name);
cnt = Convert.ToInt32(match.Value.Replace("_", "").Replace(".", "")) + 1;
rename = $"{csvFile.Replace(".csv", string.Empty)}_{DateTime.Now:yyyyMMdd}_{cnt}.csv";
File.Copy(csvFile, rename);
}
}
/// <summary>
/// Gets the latest saved file for today
/// </summary>
/// <param name="csvFile"></param>
/// <returns>true/false if the file exists and is renamed to the same name as the downloaded file.</returns>
private bool GetLatestSavedFile(string csvFile)
{
var di = new DirectoryInfo(_fileFolder).EnumerateFiles("*_??.csv").ToList();
var file = di.OrderByDescending(f => f.Name).FirstOrDefault();
_logger.LogInformation(" Getting latest Today csvFile.");
if (file != null) { File.Copy(file.FullName, csvFile); }
if (!File.Exists(csvFile)) { return true; }
_logger.LogError(" Error getting latest Today csvFile.");
return false;
}
#if NET6_0_OR_GREATER
/// <summary>
/// Using .NET 6, downloads the file needed for import
/// </summary>
/// <param name="url"></param>
/// <param name="csvFile"></param>
/// <returns></returns>
private bool DownloadFileNet(string url, string csvFile)
{
// webComTimeoutTimeout is in milliseconds
const int webComTimeoutTimeout = 1000000;
var getTask = new HttpClient().GetAsync(url);
getTask.Wait(webComTimeoutTimeout);
if (!getTask.Result.IsSuccessStatusCode)
{
_logger.LogError(" **** Error Downloading CSV File ****");
return false;
}
if (File.Exists(csvFile))
{
try
{
_logger.LogInformation(" CSV File to download exists, deleting file.");
File.Delete(csvFile);
if (File.Exists(csvFile))
{
_logger.LogError(" **** Error deleting existing CSV File ****");
return false;
}
}
catch
{
_logger.LogError(" **** Error deleting existing CSV File ****");
return false;
}
}
using var fs = new FileStream(csvFile, FileMode.CreateNew);
var responseTask = getTask.Result.Content.CopyToAsync(fs);
responseTask.Wait(webComTimeoutTimeout);
if (File.Exists(csvFile)) { return true; }
_logger.LogError(" **** Error Downloading CSV File ****");
return false;
}
#elif NET5_0
/// <summary>
/// Using .NET 5, downloads the file needed for import
/// </summary>
/// <param name="url"></param>
/// <param name="csvFile"></param>
/// <returns></returns>
private bool DownloadFileNet(string url, string csvFile)
{
using var client = new WebClient();
client.DownloadFile(url, csvFile);
if (File.Exists(csvFile)) { return true; }
_logger.LogError(" **** Error Downloading CSV File ****");
return false;
}
#else
/// Using other versions of .NET need to go here, downloads the file needed for import
/// </summary>
/// <param name="url"></param>
/// <param name="csvFile"></param>
/// <returns></returns>
private bool DownloadFileNet(string url, string csvFile)
{
// Need to code for versions < .NET 5
_logger.LogError(" **** Error Downloading CSV File ****");
return false;
}
#endif
/// <summary>
/// Logs the row counts.
/// </summary>
/// <param name="dt">The dt.</param>
/// <param name="rowCounts">The row counts.</param>
/// <param name="lastDate">The last date.</param>
/// <returns><c>true</c> if row counts are same as previous run and force is false else <c>false</c> otherwise.</returns>
private bool LogRowCounts(DataTable dt, int rowCounts, string lastDate)
{
// Get the RowCounts from the CSV DataTable
var csvCounts = dt.Rows.Count;
// This is temp code to extract the data set as JSON. The file is about 250+MB so it
// is only run from Visual Studio for when needed.
if (_opts.SaveDataToJsonFile) { SaveDataToJsonFile(dt); }
// Display stats from current data and new data
_logger.LogInformation(" **** Current Row Counts ****");
_logger.LogInformation($" Row Count: {rowCounts}");
_logger.LogInformation($" Last Date: {lastDate}");
_logger.LogInformation(" **** CSV File Row Counts ****");
_logger.LogInformation($" Row Count: {csvCounts}");
// If there is no row count change, there is no new data
if (rowCounts == csvCounts && !_opts.ForceReload)
{
_logger.LogInformation(" **** NO CHANGE IN DATA, ABORTING LOAD ****");
var csvFile = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + $@"\{FileName}";
if (File.Exists(csvFile))
{
if (_opts.KeepDownload)
{
SaveActivityFile(csvFile);
}
File.Delete(csvFile);
}
if (!_opts.WaitForExit) return true;
_logger.LogInformation("Finished, Press any key to continue...");
Console.ReadKey(false);
Environment.Exit(0);
}
else if (rowCounts == csvCounts && _opts.ForceReload)
{
_logger.LogInformation(" **** FORCE RELOADING DATA, ROW COUNT SAME ****");
}
return false;
}
/// <summary>
/// Load production data table from the staging table
/// </summary>
private void LoadProductionData()
{
var ds = _dal.LoadCovidData();
// Using returned data, compare import count with copy count to verify all data was copied
var dt2 = ds?.Tables[0];
if (dt2 == null) return;
var dr = dt2.Rows[0];
_logger.LogInformation($" Rows imported from Data World: {dr["ImportCount"]}");
_logger.LogInformation($" Rows copied to production Table: {dr["CopyCount"]}");
if ((int)dr["ImportCount"] != (int)dr["CopyCount"])
{
_logger.LogInformation(_opts.FullReload
? " Data copied from staging to production table does not equal rows imported. Re-Run when completed or investigate why mismatch."
: " Loaded today's data to production");
}
}
/// <summary>
/// Saves data that was imported from Data.World to JSON. File is huge (250MB+ when saved).
/// </summary>
/// <param name="dt">The dt.</param>
private static void SaveDataToJsonFile(DataTable dt)
{
var json = JsonConvert.SerializeObject(dt);
var jsonFile = $@"{Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location)}\Extract.json";
using var sw = new StreamWriter(jsonFile);
sw.Write(json);
sw.Close();
sw.Dispose();
}
}
}