Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stabilize FastDoubleParser part 1 #1040

Merged
merged 7 commits into from
Feb 12, 2025
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDateTime
import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
import java.math.BigDecimal
import java.math.BigInteger
Expand Down Expand Up @@ -226,8 +227,8 @@ public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColum
* @include [DataColumnStringConvertToDoubleDoc]
* @param nullStrings a set of strings that should be treated as `null` values.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser].
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
*/
@JvmName("convertToDoubleFromString")
public fun DataColumn<String>.convertToDouble(
Expand All @@ -246,8 +247,8 @@ public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColu
* @include [DataColumnStringConvertToDoubleDoc]
* @param nullStrings a set of strings that should be treated as `null` values.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser].
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
*/
@JvmName("convertToDoubleFromStringNullable")
public fun DataColumn<String?>.convertToDouble(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.io.readCSV
import org.jetbrains.kotlinx.dataframe.typeClass
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
Expand Down Expand Up @@ -45,6 +46,12 @@ public fun <T, C> DataFrame<T>.parse(vararg columns: ColumnReference<C>, options
public fun <T, C> DataFrame<T>.parse(vararg columns: KProperty<C>, options: ParserOptions? = null): DataFrame<T> =
parse(options) { columns.toColumnSet() }

/**
* Global counterpart of [ParserOptions].
* Settings changed here will affect the defaults for all parsing operations.
*
* The default values are set by [Parsers.resetToDefault].
*/
public interface GlobalParserOptions {

public fun addDateTimePattern(pattern: String)
Expand All @@ -54,7 +61,7 @@ public interface GlobalParserOptions {
/** This function can be called to skip some types. Parsing will be attempted for all other types. */
public fun addSkipType(type: KType)

/** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
/** Whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter. */
public var useFastDoubleParser: Boolean

public fun resetToDefault()
Expand Down Expand Up @@ -91,7 +98,7 @@ public interface GlobalParserOptions {
* `["null", "NULL", "NA", "N/A"]`.
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
*/
public class ParserOptions(
public val locale: Locale? = null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.to
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.size
Expand All @@ -47,6 +48,7 @@ import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.Temporal
import java.time.temporal.TemporalQuery
import java.util.Locale
import kotlin.properties.Delegates
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
Expand Down Expand Up @@ -114,6 +116,13 @@ internal class StringParserWithFormat<T>(
}
}

/**
* Central implementation for [GlobalParserOptions].
*
* Can be obtained by a user by calling [DataFrame.parser][DataFrame.Companion.parser].
*
* Defaults are set by [resetToDefault].
*/
internal object Parsers : GlobalParserOptions {

private val formatters: MutableList<DateTimeFormatter> = mutableListOf()
Expand All @@ -140,7 +149,7 @@ internal object Parsers : GlobalParserOptions {
skipTypesSet.add(type)
}

override var useFastDoubleParser: Boolean = false
override var useFastDoubleParser by Delegates.notNull<Boolean>()

private var _locale: Locale? = null

Expand All @@ -165,7 +174,7 @@ internal object Parsers : GlobalParserOptions {
.toFormatter()
.let { formatters.add(it) }

useFastDoubleParser = false
useFastDoubleParser = true
_locale = null
nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
}
Expand Down
Loading
Loading