diff --git a/graalpy/graalpy-apache-arrow-guide/README.md b/graalpy/graalpy-apache-arrow-guide/README.md index 5e329fe..6a383e2 100644 --- a/graalpy/graalpy-apache-arrow-guide/README.md +++ b/graalpy/graalpy-apache-arrow-guide/README.md @@ -34,6 +34,14 @@ Add the required dependencies for GraalPy and JArrow in the dependency section o ``` +or + +`build.gradle` +``` +implementation "org.graalvm.python:python-community:$pythonVersion" // ① +implementation "org.graalvm.python:python-embedding:$pythonVersion" // ③ +``` + ❶ The `python-community` dependency is a meta-package that transitively depends on all resources and libraries to run GraalPy. ❷ Note that the `python-community` package is not a JAR - it is simply a `pom` that declares more dependencies. @@ -56,6 +64,14 @@ Add the required dependencies for GraalPy and JArrow in the dependency section o ``` +or + +`build.gradle` +```java +implementation "org.apache.arrow:arrow-vector:$arrowVersion" // ① +implementation "org.apache.arrow:arrow-memory-unsafe:$arrowVersion" // ② +``` + ❶ The `arrow-vector` dependency is used for managing in-memory columnar data structures. ❷ The `arrow-memory-unsafe` data structures defined in the `arrow-vector` will be backed by `sun.misc.Unsafe` library. @@ -94,6 +110,27 @@ There is also another option `arrow-memory-netty`. You can read more about Apach ``` +or + +`build.gradle` +``` +plugins { + id 'org.graalvm.python' version '25.0.0' + // ... +} +``` + +`build.gradle` +``` +graalPy { + community = true + packages = [ // ① + 'pandas', // ② + 'pyarrow' // ③ + ] +} +``` + ❶ The `packages` section lists all Python packages optionally with [requirement specifiers](https://pip.pypa.io/en/stable/reference/requirement-specifiers/). ❷ Python packages and their versions can be specified as if used with pip. You can either install the latest version or you can specify the version e.g.`pandas==2.2.2`. @@ -136,20 +173,22 @@ All Python source code should be placed in `src/main/resources/org.graalvm.pytho Let's create a `data_analysis.py` file to calculate the mean and median for the Float8Vector using Pandas: ```python import pandas as pd -from polyglot.arrow import Float8Vector # ① +from polyglot.arrow import Float8Vector, enable_java_integration +enable_java_integration() # ① def calculateMean(valueVector: Float8Vector) -> float: - series = pd.Series(valueVector, dtype="float64[pyarrow]") # ② - return series.mean() + series = pd.Series(valueVector, dtype="float64[pyarrow]") # ② + return series.mean() def calculateMedian(valueVector: Float8Vector) -> float: - series = pd.Series(valueVector, dtype="float64[pyarrow]") - return series.median() + series = pd.Series(valueVector, dtype="float64[pyarrow]") + return series.median() + ``` -❶ This import is crucial. Without it zero copy memory won't be achieved. +❶ You need to call this method to enable the zero copy integration. ❷ In pandas you need to specify that the series should be backed by pyarrow, therefore adding `[pyarrow]` to the dtype. @@ -240,7 +279,19 @@ To compile the application: ./mvnw package ``` +or + +```bash +./gradlew build +``` + To run the application: ```bash ./mvnw exec:java -Dexec.mainClass="com.example.Main" +``` + +or + +```bash +./gradlew run ``` \ No newline at end of file diff --git a/graalpy/graalpy-apache-arrow-guide/build.gradle b/graalpy/graalpy-apache-arrow-guide/build.gradle index 8850437..a5f796a 100644 --- a/graalpy/graalpy-apache-arrow-guide/build.gradle +++ b/graalpy/graalpy-apache-arrow-guide/build.gradle @@ -1,7 +1,7 @@ plugins { + id 'org.graalvm.python' version '25.0.0' id 'application' id 'java' - id 'org.graalvm.python' version '25.0.0' } group = 'com.example' @@ -34,17 +34,17 @@ compileJava { } tasks.withType(JavaExec) { - jvmArgs = ['--enable-preview'] + jvmArgs = ['--enable-preview', '--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED'] } dependencies { // Apache Arrow - implementation "org.apache.arrow:arrow-vector:$arrowVersion" - implementation "org.apache.arrow:arrow-memory-unsafe:$arrowVersion" + implementation "org.apache.arrow:arrow-vector:$arrowVersion" // ① + implementation "org.apache.arrow:arrow-memory-unsafe:$arrowVersion" // ② // GraalPy - implementation "org.graalvm.python:python-community:$pythonVersion" - implementation "org.graalvm.python:python-embedding:$pythonVersion" + implementation "org.graalvm.python:python-community:$pythonVersion" // ① + implementation "org.graalvm.python:python-embedding:$pythonVersion" // ③ } application { diff --git a/graalpy/graalpy-apache-arrow-guide/pom.xml b/graalpy/graalpy-apache-arrow-guide/pom.xml index c2690f0..aa88669 100644 --- a/graalpy/graalpy-apache-arrow-guide/pom.xml +++ b/graalpy/graalpy-apache-arrow-guide/pom.xml @@ -59,10 +59,6 @@ pandas pyarrow - - - .* - process-graalpy-resources diff --git a/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py b/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py index a7d84e3..c38a858 100644 --- a/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py +++ b/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py @@ -1,6 +1,7 @@ import pandas as pd -from polyglot.arrow import Float8Vector # ① +from polyglot.arrow import Float8Vector, enable_java_integration +enable_java_integration() # ① def calculateMean(valueVector: Float8Vector) -> float: series = pd.Series(valueVector, dtype="float64[pyarrow]")