-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dataframe csv datetime #5834
Merged
Merged
Dataframe csv datetime #5834
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
e8720a0
Give message with line and column of CSV file if data conversion fails
derekdiamond 98d7ea2
Allow parsing of DateTime data in CSV import
derekdiamond 2777720
Delete query.json
derekdiamond 63f7c0f
Address comments
aa816b5
Verify contents on date column
bc636d9
Change to a sample date
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,314 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace Microsoft.Data.Analysis | ||
{ | ||
internal class DateTimeComputation : IPrimitiveColumnComputation<DateTime> | ||
{ | ||
public void Abs(PrimitiveColumnContainer<DateTime> column) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void All(PrimitiveColumnContainer<DateTime> column, out bool ret) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void Any(PrimitiveColumnContainer<DateTime> column, out bool ret) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void CumulativeMax(PrimitiveColumnContainer<DateTime> column) | ||
{ | ||
var ret = column.Buffers[0].ReadOnlySpan[0]; | ||
for (int b = 0; b < column.Buffers.Count; b++) | ||
{ | ||
var buffer = column.Buffers[b]; | ||
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(buffer); | ||
var mutableSpan = mutableBuffer.Span; | ||
var readOnlySpan = buffer.ReadOnlySpan; | ||
for (int i = 0; i < readOnlySpan.Length; i++) | ||
{ | ||
var val = readOnlySpan[i]; | ||
|
||
if (val > ret) | ||
{ | ||
ret = val; | ||
} | ||
|
||
mutableSpan[i] = ret; | ||
} | ||
column.Buffers[b] = mutableBuffer; | ||
} | ||
} | ||
|
||
public void CumulativeMax(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
{ | ||
var ret = default(DateTime); | ||
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[0]); | ||
var span = mutableBuffer.Span; | ||
long minRange = 0; | ||
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
long maxCapacity = maxRange; | ||
IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
if (enumerator.MoveNext()) | ||
{ | ||
long row = enumerator.Current; | ||
if (row < minRange || row >= maxRange) | ||
{ | ||
int bufferIndex = (int)(row / maxCapacity); | ||
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
span = mutableBuffer.Span; | ||
minRange = checked(bufferIndex * maxCapacity); | ||
maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
} | ||
row -= minRange; | ||
ret = span[(int)row]; | ||
} | ||
|
||
while (enumerator.MoveNext()) | ||
{ | ||
long row = enumerator.Current; | ||
if (row < minRange || row >= maxRange) | ||
{ | ||
int bufferIndex = (int)(row / maxCapacity); | ||
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
span = mutableBuffer.Span; | ||
minRange = checked(bufferIndex * maxCapacity); | ||
maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
} | ||
row -= minRange; | ||
|
||
var val = span[(int)row]; | ||
|
||
if (val > ret) | ||
{ | ||
ret = val; | ||
} | ||
|
||
span[(int)row] = ret; | ||
} | ||
} | ||
|
||
public void CumulativeMin(PrimitiveColumnContainer<DateTime> column) | ||
{ | ||
var ret = column.Buffers[0].ReadOnlySpan[0]; | ||
for (int b = 0; b < column.Buffers.Count; b++) | ||
{ | ||
var buffer = column.Buffers[b]; | ||
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(buffer); | ||
var mutableSpan = mutableBuffer.Span; | ||
var readOnlySpan = buffer.ReadOnlySpan; | ||
for (int i = 0; i < readOnlySpan.Length; i++) | ||
{ | ||
var val = readOnlySpan[i]; | ||
|
||
if (val < ret) | ||
{ | ||
ret = val; | ||
} | ||
|
||
mutableSpan[i] = ret; | ||
} | ||
column.Buffers[b] = mutableBuffer; | ||
} | ||
} | ||
|
||
public void CumulativeMin(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
{ | ||
var ret = default(DateTime); | ||
var mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[0]); | ||
var span = mutableBuffer.Span; | ||
long minRange = 0; | ||
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
long maxCapacity = maxRange; | ||
IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
if (enumerator.MoveNext()) | ||
{ | ||
long row = enumerator.Current; | ||
if (row < minRange || row >= maxRange) | ||
{ | ||
int bufferIndex = (int)(row / maxCapacity); | ||
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
span = mutableBuffer.Span; | ||
minRange = checked(bufferIndex * maxCapacity); | ||
maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
} | ||
row -= minRange; | ||
ret = span[(int)row]; | ||
} | ||
|
||
while (enumerator.MoveNext()) | ||
{ | ||
long row = enumerator.Current; | ||
if (row < minRange || row >= maxRange) | ||
{ | ||
int bufferIndex = (int)(row / maxCapacity); | ||
mutableBuffer = DataFrameBuffer<DateTime>.GetMutableBuffer(column.Buffers[bufferIndex]); | ||
span = mutableBuffer.Span; | ||
minRange = checked(bufferIndex * maxCapacity); | ||
maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
} | ||
row -= minRange; | ||
|
||
var val = span[(int)row]; | ||
|
||
if (val < ret) | ||
{ | ||
ret = val; | ||
} | ||
|
||
span[(int)row] = ret; | ||
} | ||
} | ||
|
||
public void CumulativeProduct(PrimitiveColumnContainer<DateTime> column) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void CumulativeProduct(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void CumulativeSum(PrimitiveColumnContainer<DateTime> column) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void CumulativeSum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void Max(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
{ | ||
ret = column.Buffers[0].ReadOnlySpan[0]; | ||
for (int b = 0; b < column.Buffers.Count; b++) | ||
{ | ||
var buffer = column.Buffers[b]; | ||
var readOnlySpan = buffer.ReadOnlySpan; | ||
for (int i = 0; i < readOnlySpan.Length; i++) | ||
{ | ||
var val = readOnlySpan[i]; | ||
|
||
if (val > ret) | ||
{ | ||
ret = val; | ||
} | ||
} | ||
} | ||
} | ||
|
||
public void Max(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
{ | ||
ret = default; | ||
var readOnlySpan = column.Buffers[0].ReadOnlySpan; | ||
long minRange = 0; | ||
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
long maxCapacity = maxRange; | ||
IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
while (enumerator.MoveNext()) | ||
{ | ||
long row = enumerator.Current; | ||
if (row < minRange || row >= maxRange) | ||
{ | ||
int bufferIndex = (int)(row / maxCapacity); | ||
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan; | ||
minRange = checked(bufferIndex * maxCapacity); | ||
maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
} | ||
row -= minRange; | ||
|
||
var val = readOnlySpan[(int)row]; | ||
|
||
if (val > ret) | ||
{ | ||
ret = val; | ||
} | ||
} | ||
} | ||
|
||
public void Min(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
{ | ||
ret = column.Buffers[0].ReadOnlySpan[0]; | ||
for (int b = 0; b < column.Buffers.Count; b++) | ||
{ | ||
var buffer = column.Buffers[b]; | ||
var readOnlySpan = buffer.ReadOnlySpan; | ||
for (int i = 0; i < readOnlySpan.Length; i++) | ||
{ | ||
var val = readOnlySpan[i]; | ||
|
||
if (val < ret) | ||
{ | ||
ret = val; | ||
} | ||
} | ||
} | ||
} | ||
|
||
public void Min(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
{ | ||
ret = default; | ||
var readOnlySpan = column.Buffers[0].ReadOnlySpan; | ||
long minRange = 0; | ||
long maxRange = ReadOnlyDataFrameBuffer<DateTime>.MaxCapacity; | ||
long maxCapacity = maxRange; | ||
IEnumerator<long> enumerator = rows.GetEnumerator(); | ||
while (enumerator.MoveNext()) | ||
{ | ||
long row = enumerator.Current; | ||
if (row < minRange || row >= maxRange) | ||
{ | ||
int bufferIndex = (int)(row / maxCapacity); | ||
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan; | ||
minRange = checked(bufferIndex * maxCapacity); | ||
maxRange = checked((bufferIndex + 1) * maxCapacity); | ||
} | ||
row -= minRange; | ||
|
||
var val = readOnlySpan[(int)row]; | ||
|
||
if (val < ret) | ||
{ | ||
ret = val; | ||
} | ||
} | ||
} | ||
|
||
public void Product(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void Product(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void Sum(PrimitiveColumnContainer<DateTime> column, out DateTime ret) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void Sum(PrimitiveColumnContainer<DateTime> column, IEnumerable<long> rows, out DateTime ret) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public void Round(PrimitiveColumnContainer<DateTime> column) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if it is empty?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had the same thought when I first saw the PR, so I looked at what the other columns are doing. None of them check for empty here. It's not high priority IMO, so I'm thinking we can fix that for all the columns in a separate PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you log an issue for this? So we remember to do it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#5836