Skip to content

Commit

Permalink
Merge pull request #1081 from Kotlin/to_dataframe_imrovements
Browse files Browse the repository at this point in the history
To dataframe improvements
  • Loading branch information
AndreiKingsley authored Mar 7, 2025
2 parents eb8ee3e + 3223e9c commit 7c090b4
Show file tree
Hide file tree
Showing 15 changed files with 755 additions and 286 deletions.
2 changes: 2 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -5399,6 +5399,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun getHasProperties (Lkotlin/reflect/KClass;)Z
public static final fun isValueType (Lkotlin/reflect/KClass;)Z
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.api.hasProperties
import org.jetbrains.kotlinx.dataframe.impl.api.isValueType
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
Expand All @@ -26,7 +28,15 @@ import kotlin.reflect.KProperty
@Interpretable("toDataFrameDefault")
public inline fun <reified T> Iterable<T>.toDataFrame(): DataFrame<T> =
toDataFrame {
properties()
// check if type is value: primitives, primitive arrays, datetime types etc.,
// or has no properties
if (T::class.isValueType || !T::class.hasProperties) {
// create a single `value` column
ValueProperty<T>::value from { it }
} else {
// otherwise creates columns based on properties
properties()
}
}

@Refine
Expand Down Expand Up @@ -217,99 +227,6 @@ public inline fun <reified T> Iterable<T>.toDataFrame(columnName: String): DataF

// region toDataFrame overloads for built-in types

/*
Without overloads Iterable<String>.toDataFrame produces unexpected result
```
val string = listOf("aaa", "aa", null)
string.toDataFrame()
```
=>
length
0 3
1 2
2 null
*/

@JvmName("toDataFrameByte")
public inline fun <reified B : Byte?> Iterable<B>.toDataFrame(): DataFrame<ValueProperty<B>> =
toDataFrame {
ValueProperty<B>::value from { it }
}.cast()

@JvmName("toDataFrameShort")
public inline fun <reified S : Short?> Iterable<S>.toDataFrame(): DataFrame<ValueProperty<S>> =
toDataFrame {
ValueProperty<S>::value from { it }
}.cast()

@JvmName("toDataFrameInt")
public inline fun <reified I : Int?> Iterable<I>.toDataFrame(): DataFrame<ValueProperty<I>> =
toDataFrame {
ValueProperty<I>::value from { it }
}.cast()

@JvmName("toDataFrameLong")
public inline fun <reified L : Long?> Iterable<L>.toDataFrame(): DataFrame<ValueProperty<L>> =
toDataFrame {
ValueProperty<L>::value from { it }
}.cast()

@JvmName("toDataFrameString")
public inline fun <reified S : String?> Iterable<S>.toDataFrame(): DataFrame<ValueProperty<S>> =
toDataFrame {
ValueProperty<S>::value from { it }
}.cast()

@JvmName("toDataFrameChar")
public inline fun <reified C : Char?> Iterable<C>.toDataFrame(): DataFrame<ValueProperty<C>> =
toDataFrame {
ValueProperty<C>::value from { it }
}.cast()

@JvmName("toDataFrameBoolean")
public inline fun <reified B : Boolean?> Iterable<B>.toDataFrame(): DataFrame<ValueProperty<B>> =
toDataFrame {
ValueProperty<B>::value from { it }
}.cast()

@JvmName("toDataFrameFloat")
public inline fun <reified F : Float?> Iterable<F>.toDataFrame(): DataFrame<ValueProperty<F>> =
toDataFrame {
ValueProperty<F>::value from { it }
}.cast()

@JvmName("toDataFrameDouble")
public inline fun <reified D : Double?> Iterable<D>.toDataFrame(): DataFrame<ValueProperty<D>> =
toDataFrame {
ValueProperty<D>::value from { it }
}.cast()

@JvmName("toDataFrameUByte")
public inline fun <reified U : UByte?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameUShort")
public inline fun <reified U : UShort?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameUInt")
public inline fun <reified U : UInt?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameULong")
public inline fun <reified U : ULong?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@DataSchema
public interface ValueProperty<T> {
public val value: T
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import org.jetbrains.kotlinx.dataframe.impl.projectUpTo
import org.jetbrains.kotlinx.dataframe.impl.schema.sortWithConstructor
import java.lang.reflect.InvocationTargetException
import java.lang.reflect.Method
import java.time.temporal.Temporal
import java.time.temporal.TemporalAccessor
import java.time.temporal.TemporalAmount
import kotlin.reflect.KCallable
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
Expand All @@ -37,23 +38,51 @@ import kotlin.reflect.jvm.isAccessible
import kotlin.reflect.jvm.javaField
import kotlin.reflect.typeOf

// non-standard value types (not supertypes, but exact types)
private val valueTypes = setOf(
Any::class,
Unit::class,
Char::class,
UByte::class,
UShort::class,
UInt::class,
ULong::class,
String::class,
Boolean::class,
kotlin.time.Duration::class,
kotlinx.datetime.LocalDate::class,
kotlinx.datetime.LocalDateTime::class,
kotlinx.datetime.Instant::class,
kotlinx.datetime.TimeZone::class,
kotlinx.datetime.DateTimePeriod::class,
kotlinx.datetime.DateTimeUnit::class,
)

/**
* Checks if `KClass` is a value type (number, datetime, string, etc.)
* Should be aligned with `ConeKotlinType.isValueType()` in
* plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/toDataFrame.kt
*/
@PublishedApi
internal val KClass<*>.isValueType: Boolean
get() =
this in valueTypes ||
this.isSubclassOf(Number::class) ||
this.isSubclassOf(Enum::class) ||
this.isSubclassOf(Temporal::class) ||
// all java datetime types
this.isSubclassOf(TemporalAccessor::class) ||
this.isSubclassOf(TemporalAmount::class) ||
this.isArray

/**
* Checks if `KClass` has public properties / getter functions (for pojo-like classes).
*/
@PublishedApi
internal val KClass<*>.hasProperties: Boolean
get() = this.memberProperties.any { it.visibility == KVisibility.PUBLIC } ||
// check pojo-like classes
this.memberFunctions.any { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }

internal class CreateDataFrameDslImpl<T>(
override val source: Iterable<T>,
private val clazz: KClass<*>,
Expand Down
Loading

0 comments on commit 7c090b4

Please sign in to comment.