Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cell type enum, date/sharedStrings helpers #102

Merged
merged 2 commits into from
Apr 6, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 16 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,42 +38,38 @@ guard let file = XLSXFile(filepath: "./categories.xlsx") else {
}

for path in try file.parseWorksheetPaths() {
let ws = try file.parseWorksheet(at: path)
for row in ws.data?.rows ?? [] {
let worksheet = try file.parseWorksheet(at: path)
for row in worksheet.data?.rows ?? [] {
for c in row.cells {
print(c)
}
}
}
```

This prints every cell from every worksheet in the given XLSX file. Please refer
This prints raw cell data from every worksheet in the given XLSX file. Please refer
to the [`Worksheet`
model](https://github.com/MaxDesiatov/CoreXLSX/blob/master/Sources/CoreXLSX/Worksheet/Worksheet.swift)
for more atttributes you might need to read from a parsed file.

### Shared strings

Some cells (usually with strings) have their values shared in a separate model
type, which you can get by evaluating `try file.parseSharedString()`. You can
refer to the [`SharedStrings`
model](https://github.com/MaxDesiatov/CoreXLSX/blob/master/Sources/CoreXLSX/SharedStrings.swift)
for the full list of its properties.
Strings in spreadsheet internals are frequently represented as shared strings,
thus to parse a string value from a cell use of `stringValue(_: SharedStrings)`
function is recommended, together with `try file.parseSharedString()` to get
the list of shared strings first:

Here's how you can get all shared strings in column "C" for example:

```swift
let sharedStrings = try file.parseSharedStrings()
let columnCStrings = ws.cells(atColumns: [ColumnReference("C")!])
// in format internals "s" stands for "shared",
// if it is used, it means the value is an index of a shared string
.filter { $0.type == "s" }
// get the value of the cell
.compactMap { $0.value }
// convert the value to a number
.compactMap { Int($0) }
// use the number as an index in the array of shared strings
.compactMap { sharedStrings.items[$0].text }
let columnCStrings = worksheet.cells(atColumns: [ColumnReference("C")!])
.compactMap { $0.stringValue(sharedStrings) }
```

To parse a date value from a cell, use `dateValue` property on the `Cell` type:

```swift
let columnCDates = worksheet.cells(atColumns: [ColumnReference("C")!])
.compactMap { $0.dateValue }
```

### Styles
Expand Down
12 changes: 11 additions & 1 deletion Sources/CoreXLSX/Worksheet/Cell.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@
// Created by Max Desiatov on 24/11/2018.
//

/// [docs](http://www.datypic.com/sc/ooxml/t-ssml_ST_CellType.html)
public enum CellType: String, Codable {
case bool = "b"
case number = "n"
case error = "e"
case sharedString = "s"
case string = "str"
case inlineStr
}

// swiftlint:disable:next line_length
/// [docs](https://wiki.ucl.ac.uk/display/~ucftpw2/2013/10/22/Using+git+for+version+control+of+Excel+spreadsheets+-+part+2+of+3)
public struct Cell: Codable, Equatable {
public let reference: CellReference
public let type: String?
public let type: CellType?

// FIXME: Attribute "s" in a cell is an index into the styles table,
// while the cell type "s" corresponds to the shared string table.
Expand Down
57 changes: 53 additions & 4 deletions Sources/CoreXLSX/Worksheet/CellQueries.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
// Created by Max Desiatov on 24/11/2018.
//

extension Worksheet {
import Foundation

public extension Worksheet {
/// Return all cells that are contained in a given worksheet and collection of
/// columns.
public func cells<T>(atColumns columns: T) -> [Cell]
func cells<T>(atColumns columns: T) -> [Cell]
where T: Collection, T.Element == ColumnReference {
return data?.rows.map {
$0.cells.filter { columns.contains($0.reference.column) }
Expand All @@ -18,15 +20,15 @@ extension Worksheet {

/// Return all cells that are contained in a given worksheet and collection of
/// rows.
public func cells<T>(atRows rows: T) -> [Cell]
func cells<T>(atRows rows: T) -> [Cell]
where T: Collection, T.Element == UInt {
return data?.rows.filter { rows.contains($0.reference) }
.reduce([]) { $0 + $1.cells } ?? []
}

/// Return all cells that are contained in a given worksheet and collections
/// of rows and columns.
public func cells<T1, T2>(atColumns columns: T1, rows: T2) -> [Cell]
func cells<T1, T2>(atColumns columns: T1, rows: T2) -> [Cell]
where T1: Collection, T1.Element == ColumnReference,
T2: Collection, T2.Element == UInt {
return data?.rows.filter { rows.contains($0.reference) }.map {
Expand All @@ -35,3 +37,50 @@ extension Worksheet {
.reduce([]) { $0 + $1 } ?? []
}
}

let referenceCalendar = Calendar(identifier: .gregorian)
let referenceTimeZone = TimeZone.autoupdatingCurrent
private let referenceDate = DateComponents(
calendar: referenceCalendar,
timeZone: referenceTimeZone,
year: 1899,
month: 12,
day: 30,
hour: 0,
minute: 0,
second: 0,
nanosecond: 0
).date
private let secondsInADay: Double = 86_400_000

public extension Cell {
/// Returns a string value for this cell, potentially loading a shared string value from a
/// given `sharedStrings` argument.
func stringValue(_ sharedStrings: SharedStrings) -> String? {
guard type == .sharedString, let index = value.flatMap(Int.init) else { return value }

return sharedStrings.items[index].text
}

/// Returns a date value parsed from the cell in the [OLE Automation
/// Date](https://docs.microsoft.com/en-us/dotnet/api/system.datetime.tooadate?view=netframework-4.8)
MaxDesiatov marked this conversation as resolved.
Show resolved Hide resolved
/// format. As this format doesn't encode time zones, current user's time zone is used, which is
/// taken from `TimeZone.autoupdatingCurrent`.
var dateValue: Date? {
guard
type != .sharedString,
let intervalSinceReference = value.flatMap(Double.init),
let referenceDate = referenceDate
else { return nil }

let days = Int(floor(intervalSinceReference))
let seconds = Int(
floor(intervalSinceReference.truncatingRemainder(dividingBy: 1) * secondsInADay)
)

guard let addedDays = referenceCalendar.date(byAdding: .day, value: days, to: referenceDate)
else { return nil }

return referenceCalendar.date(byAdding: .second, value: seconds, to: addedDays)
}
}
Binary file added Tests/CoreXLSXTests/Dates.xlsx
Binary file not shown.
5 changes: 1 addition & 4 deletions Tests/CoreXLSXTests/SharedStrings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,7 @@ final class SharedStringsTests: XCTestCase {
for path in try file.parseWorksheetPaths() {
let ws = try file.parseWorksheet(at: path)
columnCStrings = ws.cells(atColumns: [ColumnReference("C")!])
.filter { $0.type == "s" }
.compactMap { $0.value }
.compactMap { Int($0) }
.compactMap { sharedStrings.items[$0].text }
.compactMap { $0.stringValue(sharedStrings) }
}

XCTAssertEqual(columnC, columnCStrings)
Expand Down
30 changes: 30 additions & 0 deletions Tests/CoreXLSXTests/XLSXFile.swift
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,36 @@ final class CoreXLSXTests: XCTestCase {
])
}

func testDates() throws {
guard let file =
XLSXFile(filepath: "\(currentWorkingPath)/Dates.xlsx") else {
XCTAssert(false, "failed to open the file")
return
}

let dates = try file.parseWorksheetPaths()
.flatMap { try file.parseWorksheet(at: $0).data?.rows ?? [] }
.flatMap { $0.cells }
.compactMap { $0.dateValue }

XCTAssertEqual(dates, [
DateComponents(
calendar: referenceCalendar,
timeZone: referenceTimeZone,
year: 2019,
month: 09,
day: 10
).date,
DateComponents(
calendar: referenceCalendar,
timeZone: referenceTimeZone,
year: 2019,
month: 10,
day: 11
).date,
])
}

func testPublicAPI() throws {
guard let file =
XLSXFile(filepath: "\(currentWorkingPath)/categories.xlsx") else {
Expand Down