Skip to content

Commit 9c55263

Browse files
authored
Merge pull request #1057 from Kotlin/deephaven-csv-default
Deephaven csv as default
2 parents 6bf0a3f + be88531 commit 9c55263

File tree

73 files changed

+881
-1446
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+881
-1446
lines changed

build.gradle.kts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ dependencies {
5555
api(project(":dataframe-excel"))
5656
api(project(":dataframe-openapi"))
5757
api(project(":dataframe-jdbc"))
58-
// TODO enable when it leaves the experimental phase
59-
// api(project(":dataframe-csv"))
58+
api(project(":dataframe-csv"))
6059

6160
kover(project(":core"))
6261
kover(project(":dataframe-arrow"))

core/api/core.api

Lines changed: 98 additions & 39 deletions
Large diffs are not rendered by default.

core/build.gradle.kts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ dependencies {
8585
testImplementation(libs.kotlin.scriptingJvm)
8686
testImplementation(libs.jsoup)
8787
testImplementation(libs.sl4jsimple)
88+
89+
// for JupyterCodegenTests and samples.api
90+
testImplementation(project(":dataframe-csv"))
8891
}
8992

9093
val samplesImplementation by configurations.getting {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1212
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1313
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
1414
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
15-
import org.jetbrains.kotlinx.dataframe.io.readCSV
1615
import org.jetbrains.kotlinx.dataframe.typeClass
1716
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
1817
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
@@ -27,7 +26,7 @@ import kotlin.reflect.KType
2726
* These options are used to configure how [DataColumns][DataColumn] of type [String] or [String?][String]
2827
* should be parsed.
2928
* You can always pass a [ParserOptions] object to functions that perform parsing, like [tryParse], [parse],
30-
* or even [DataFrame.readCSV][DataFrame.Companion.readCSV] to override these options.
29+
* or even [DataFrame.readCsv][DataFrame.Companion.readCsv] to override these options.
3130
*/
3231
public val DataFrame.Companion.parser: GlobalParserOptions
3332
get() = Parsers

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

Lines changed: 83 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,35 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod
1919
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
2020
import org.jetbrains.kotlinx.dataframe.impl.api.parse
2121
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
22+
import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV
2223
import org.jetbrains.kotlinx.dataframe.util.AS_URL
2324
import org.jetbrains.kotlinx.dataframe.util.AS_URL_IMPORT
2425
import org.jetbrains.kotlinx.dataframe.util.AS_URL_REPLACE
2526
import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV
2627
import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV_REPLACE
28+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV
29+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_OR_URL_REPLACE
30+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_REPLACE
31+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_IMPORT
32+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_STREAM_REPLACE
33+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_URL_REPLACE
34+
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM
35+
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_READER_REPLACE
36+
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_STREAM_REPLACE
37+
import org.jetbrains.kotlinx.dataframe.util.TO_CSV
38+
import org.jetbrains.kotlinx.dataframe.util.TO_CSV_IMPORT
39+
import org.jetbrains.kotlinx.dataframe.util.TO_CSV_REPLACE
40+
import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV
41+
import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_FILE_REPLACE
42+
import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_IMPORT
43+
import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_PATH_REPLACE
44+
import org.jetbrains.kotlinx.dataframe.util.WRITE_CSV_WRITER_REPLACE
2745
import org.jetbrains.kotlinx.dataframe.values
2846
import java.io.BufferedInputStream
2947
import java.io.BufferedReader
3048
import java.io.File
3149
import java.io.FileInputStream
3250
import java.io.FileWriter
33-
import java.io.IOException
3451
import java.io.InputStream
3552
import java.io.InputStreamReader
3653
import java.io.Reader
@@ -46,6 +63,7 @@ import kotlin.reflect.KType
4663
import kotlin.reflect.typeOf
4764
import kotlin.time.Duration
4865

66+
@Deprecated(message = APACHE_CSV, level = DeprecationLevel.WARNING)
4967
public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
5068
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame =
5169
DataFrame.readCSV(stream = stream, delimiter = delimiter, header = header)
@@ -57,14 +75,18 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
5775

5876
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
5977

60-
override val testOrder: Int = 20000
78+
override val testOrder: Int = 20_001
6179

6280
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
6381
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
6482
return DefaultReadCsvMethod(pathRepresentation, arguments)
6583
}
6684
}
6785

86+
@Deprecated(
87+
message = APACHE_CSV,
88+
level = DeprecationLevel.WARNING,
89+
)
6890
public enum class CSVType(public val format: CSVFormat) {
6991
DEFAULT(
7092
CSVFormat.DEFAULT.builder()
@@ -81,12 +103,19 @@ public enum class CSVType(public val format: CSVFormat) {
81103

82104
private val defaultCharset = Charsets.UTF_8
83105

106+
@Deprecated("", level = DeprecationLevel.WARNING)
84107
internal fun isCompressed(fileOrUrl: String) = listOf("gz", "zip").contains(fileOrUrl.split(".").last())
85108

109+
@Deprecated("", level = DeprecationLevel.WARNING)
86110
internal fun isCompressed(file: File) = listOf("gz", "zip").contains(file.extension)
87111

112+
@Deprecated("", level = DeprecationLevel.WARNING)
88113
internal fun isCompressed(url: URL) = isCompressed(url.path)
89114

115+
@Deprecated(
116+
message = APACHE_CSV,
117+
level = DeprecationLevel.HIDDEN, // clashes with the new readDelim
118+
)
90119
@Refine
91120
@Interpretable("ReadDelimStr")
92121
public fun DataFrame.Companion.readDelimStr(
@@ -106,7 +135,7 @@ public fun DataFrame.Companion.readDelimStr(
106135

107136
@Deprecated(
108137
message = DF_READ_NO_CSV,
109-
replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE),
138+
replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE, READ_CSV_IMPORT),
110139
level = DeprecationLevel.ERROR,
111140
)
112141
public fun DataFrame.Companion.read(
@@ -118,22 +147,13 @@ public fun DataFrame.Companion.read(
118147
readLines: Int? = null,
119148
duplicate: Boolean = true,
120149
charset: Charset = Charsets.UTF_8,
121-
): DataFrame<*> =
122-
catchHttpResponse(asUrl(fileOrUrl)) {
123-
readDelim(
124-
it,
125-
delimiter,
126-
header,
127-
isCompressed(fileOrUrl),
128-
getCSVType(fileOrUrl),
129-
colTypes,
130-
skipLines,
131-
readLines,
132-
duplicate,
133-
charset,
134-
)
135-
}
150+
): DataFrame<*> = error(DF_READ_NO_CSV)
136151

152+
@Deprecated(
153+
message = READ_CSV,
154+
replaceWith = ReplaceWith(READ_CSV_FILE_OR_URL_REPLACE, READ_CSV_IMPORT),
155+
level = DeprecationLevel.WARNING,
156+
)
137157
@OptInRefine
138158
@Interpretable("ReadCSV0")
139159
public fun DataFrame.Companion.readCSV(
@@ -163,6 +183,11 @@ public fun DataFrame.Companion.readCSV(
163183
)
164184
}
165185

186+
@Deprecated(
187+
message = READ_CSV,
188+
replaceWith = ReplaceWith(READ_CSV_FILE_REPLACE, READ_CSV_IMPORT),
189+
level = DeprecationLevel.WARNING,
190+
)
166191
public fun DataFrame.Companion.readCSV(
167192
file: File,
168193
delimiter: Char = ',',
@@ -188,6 +213,11 @@ public fun DataFrame.Companion.readCSV(
188213
parserOptions,
189214
)
190215

216+
@Deprecated(
217+
message = READ_CSV,
218+
replaceWith = ReplaceWith(READ_CSV_URL_REPLACE, READ_CSV_IMPORT),
219+
level = DeprecationLevel.WARNING,
220+
)
191221
public fun DataFrame.Companion.readCSV(
192222
url: URL,
193223
delimiter: Char = ',',
@@ -212,6 +242,11 @@ public fun DataFrame.Companion.readCSV(
212242
parserOptions,
213243
)
214244

245+
@Deprecated(
246+
message = READ_CSV,
247+
replaceWith = ReplaceWith(READ_CSV_STREAM_REPLACE, READ_CSV_IMPORT),
248+
level = DeprecationLevel.WARNING,
249+
)
215250
public fun DataFrame.Companion.readCSV(
216251
stream: InputStream,
217252
delimiter: Char = ',',
@@ -238,13 +273,6 @@ public fun DataFrame.Companion.readCSV(
238273
parserOptions,
239274
)
240275

241-
private fun getCSVType(path: String): CSVType =
242-
when (path.substringAfterLast('.').lowercase()) {
243-
"csv" -> CSVType.DEFAULT
244-
"tdf" -> CSVType.TDF
245-
else -> throw IOException("Unknown file format")
246-
}
247-
248276
@Deprecated(
249277
message = AS_URL,
250278
replaceWith = ReplaceWith(AS_URL_REPLACE, AS_URL_IMPORT),
@@ -264,6 +292,11 @@ private fun getFormat(
264292
.setAllowMissingColumnNames(duplicate)
265293
.build()
266294

295+
@Deprecated(
296+
message = READ_DELIM,
297+
replaceWith = ReplaceWith(READ_DELIM_STREAM_REPLACE),
298+
level = DeprecationLevel.WARNING,
299+
)
267300
public fun DataFrame.Companion.readDelim(
268301
inStream: InputStream,
269302
delimiter: Char = ',',
@@ -343,6 +376,11 @@ public fun ColType.toKType(): KType =
343376
ColType.Char -> typeOf<Char>()
344377
}
345378

379+
@Deprecated(
380+
message = READ_DELIM,
381+
replaceWith = ReplaceWith(READ_DELIM_READER_REPLACE),
382+
level = DeprecationLevel.WARNING,
383+
)
346384
public fun DataFrame.Companion.readDelim(
347385
reader: Reader,
348386
format: CSVFormat = CSVFormat.DEFAULT.builder()
@@ -371,12 +409,27 @@ public fun DataFrame.Companion.readDelim(
371409
)
372410
}
373411

412+
@Deprecated(
413+
message = WRITE_CSV,
414+
replaceWith = ReplaceWith(WRITE_CSV_FILE_REPLACE, WRITE_CSV_IMPORT),
415+
level = DeprecationLevel.WARNING,
416+
)
374417
public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT): Unit =
375418
writeCSV(FileWriter(file), format)
376419

420+
@Deprecated(
421+
message = WRITE_CSV,
422+
replaceWith = ReplaceWith(WRITE_CSV_PATH_REPLACE, WRITE_CSV_IMPORT),
423+
level = DeprecationLevel.WARNING,
424+
)
377425
public fun AnyFrame.writeCSV(path: String, format: CSVFormat = CSVFormat.DEFAULT): Unit =
378426
writeCSV(FileWriter(path), format)
379427

428+
@Deprecated(
429+
message = WRITE_CSV,
430+
replaceWith = ReplaceWith(WRITE_CSV_WRITER_REPLACE, WRITE_CSV_IMPORT),
431+
level = DeprecationLevel.WARNING,
432+
)
380433
public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.DEFAULT) {
381434
format.print(writer).use { printer ->
382435
if (!format.skipHeaderRecord) {
@@ -395,6 +448,11 @@ public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.D
395448
}
396449
}
397450

451+
@Deprecated(
452+
message = TO_CSV,
453+
replaceWith = ReplaceWith(TO_CSV_REPLACE, TO_CSV_IMPORT),
454+
level = DeprecationLevel.WARNING,
455+
)
398456
public fun AnyFrame.toCsv(format: CSVFormat = CSVFormat.DEFAULT): String =
399457
StringWriter().use {
400458
this.writeCSV(it, format)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,23 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
55
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
66
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
77
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadTsvMethod
8+
import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV
9+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV
10+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_OR_URL_REPLACE
11+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_REPLACE
12+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_IMPORT
13+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_STREAM_REPLACE
14+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_URL_REPLACE
815
import java.io.File
916
import java.io.FileInputStream
1017
import java.io.InputStream
1118
import java.net.URL
1219
import java.nio.charset.Charset
1320

21+
@Deprecated(
22+
message = APACHE_CSV,
23+
level = DeprecationLevel.WARNING,
24+
)
1425
public class TSV : SupportedDataFrameFormat {
1526
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame =
1627
DataFrame.readTSV(stream, header = header)
@@ -21,14 +32,19 @@ public class TSV : SupportedDataFrameFormat {
2132

2233
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
2334

24-
override val testOrder: Int = 30000
35+
override val testOrder: Int = 30_001
2536

2637
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod =
2738
DefaultReadTsvMethod(pathRepresentation)
2839
}
2940

3041
private const val TAB_CHAR = '\t'
3142

43+
@Deprecated(
44+
message = READ_TSV,
45+
replaceWith = ReplaceWith(READ_TSV_FILE_OR_URL_REPLACE, READ_TSV_IMPORT),
46+
level = DeprecationLevel.WARNING,
47+
)
3248
public fun DataFrame.Companion.readTSV(
3349
fileOrUrl: String,
3450
header: List<String> = listOf(),
@@ -55,6 +71,11 @@ public fun DataFrame.Companion.readTSV(
5571
)
5672
}
5773

74+
@Deprecated(
75+
message = READ_TSV,
76+
replaceWith = ReplaceWith(READ_TSV_FILE_REPLACE, READ_TSV_IMPORT),
77+
level = DeprecationLevel.WARNING,
78+
)
5879
public fun DataFrame.Companion.readTSV(
5980
file: File,
6081
header: List<String> = listOf(),
@@ -77,6 +98,11 @@ public fun DataFrame.Companion.readTSV(
7798
charset,
7899
)
79100

101+
@Deprecated(
102+
message = READ_TSV,
103+
replaceWith = ReplaceWith(READ_TSV_URL_REPLACE, READ_TSV_IMPORT),
104+
level = DeprecationLevel.WARNING,
105+
)
80106
public fun DataFrame.Companion.readTSV(
81107
url: URL,
82108
header: List<String> = listOf(),
@@ -99,6 +125,11 @@ public fun DataFrame.Companion.readTSV(
99125
parserOptions,
100126
)
101127

128+
@Deprecated(
129+
message = READ_TSV,
130+
replaceWith = ReplaceWith(READ_TSV_STREAM_REPLACE, READ_TSV_IMPORT),
131+
level = DeprecationLevel.WARNING,
132+
)
102133
public fun DataFrame.Companion.readTSV(
103134
stream: InputStream,
104135
header: List<String> = listOf(),

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,15 @@ internal class Integration(private val notebook: Notebook, private val options:
157157
override fun Builder.onLoaded() {
158158
if (version != null) {
159159
if (enableExperimentalCsv?.toBoolean() == true) {
160-
println("Enabling experimental CSV module: dataframe-csv")
161-
dependencies("org.jetbrains.kotlinx:dataframe-csv:$version")
160+
println("CSV module is already enabled by default now.")
162161
}
163162
if (enableExperimentalGeo?.toBoolean() == true) {
164163
println("Enabling experimental Geo module: dataframe-geo")
165164
repositories("https://repo.osgeo.org/repository/release")
166165
dependencies("org.jetbrains.kotlinx:dataframe-geo:$version")
167166
}
168167
dependencies(
168+
"org.jetbrains.kotlinx:dataframe-csv:$version",
169169
"org.jetbrains.kotlinx:dataframe-excel:$version",
170170
"org.jetbrains.kotlinx:dataframe-jdbc:$version",
171171
"org.jetbrains.kotlinx:dataframe-arrow:$version",

0 commit comments

Comments
 (0)