diff --git a/graalpy/graalpy-apache-arrow-guide/README.md b/graalpy/graalpy-apache-arrow-guide/README.md
index 5e329fe..6a383e2 100644
--- a/graalpy/graalpy-apache-arrow-guide/README.md
+++ b/graalpy/graalpy-apache-arrow-guide/README.md
@@ -34,6 +34,14 @@ Add the required dependencies for GraalPy and JArrow in the dependency section o
```
+or
+
+`build.gradle`
+```
+implementation "org.graalvm.python:python-community:$pythonVersion" // ①
+implementation "org.graalvm.python:python-embedding:$pythonVersion" // ③
+```
+
❶ The `python-community` dependency is a meta-package that transitively depends on all resources and libraries to run GraalPy.
❷ Note that the `python-community` package is not a JAR - it is simply a `pom` that declares more dependencies.
@@ -56,6 +64,14 @@ Add the required dependencies for GraalPy and JArrow in the dependency section o
```
+or
+
+`build.gradle`
+```java
+implementation "org.apache.arrow:arrow-vector:$arrowVersion" // ①
+implementation "org.apache.arrow:arrow-memory-unsafe:$arrowVersion" // ②
+```
+
❶ The `arrow-vector` dependency is used for managing in-memory columnar data structures.
❷ The `arrow-memory-unsafe` data structures defined in the `arrow-vector` will be backed by `sun.misc.Unsafe` library.
@@ -94,6 +110,27 @@ There is also another option `arrow-memory-netty`. You can read more about Apach
```
+or
+
+`build.gradle`
+```
+plugins {
+ id 'org.graalvm.python' version '25.0.0'
+ // ...
+}
+```
+
+`build.gradle`
+```
+graalPy {
+ community = true
+ packages = [ // ①
+ 'pandas', // ②
+ 'pyarrow' // ③
+ ]
+}
+```
+
❶ The `packages` section lists all Python packages optionally with [requirement specifiers](https://pip.pypa.io/en/stable/reference/requirement-specifiers/).
❷ Python packages and their versions can be specified as if used with pip. You can either install the latest version or you can specify the version e.g.`pandas==2.2.2`.
@@ -136,20 +173,22 @@ All Python source code should be placed in `src/main/resources/org.graalvm.pytho
Let's create a `data_analysis.py` file to calculate the mean and median for the Float8Vector using Pandas:
```python
import pandas as pd
-from polyglot.arrow import Float8Vector # ①
+from polyglot.arrow import Float8Vector, enable_java_integration
+enable_java_integration() # ①
def calculateMean(valueVector: Float8Vector) -> float:
- series = pd.Series(valueVector, dtype="float64[pyarrow]") # ②
- return series.mean()
+ series = pd.Series(valueVector, dtype="float64[pyarrow]") # ②
+ return series.mean()
def calculateMedian(valueVector: Float8Vector) -> float:
- series = pd.Series(valueVector, dtype="float64[pyarrow]")
- return series.median()
+ series = pd.Series(valueVector, dtype="float64[pyarrow]")
+ return series.median()
+
```
-❶ This import is crucial. Without it zero copy memory won't be achieved.
+❶ You need to call this method to enable the zero copy integration.
❷ In pandas you need to specify that the series should be backed by pyarrow, therefore adding `[pyarrow]` to the dtype.
@@ -240,7 +279,19 @@ To compile the application:
./mvnw package
```
+or
+
+```bash
+./gradlew build
+```
+
To run the application:
```bash
./mvnw exec:java -Dexec.mainClass="com.example.Main"
+```
+
+or
+
+```bash
+./gradlew run
```
\ No newline at end of file
diff --git a/graalpy/graalpy-apache-arrow-guide/build.gradle b/graalpy/graalpy-apache-arrow-guide/build.gradle
index 8850437..a5f796a 100644
--- a/graalpy/graalpy-apache-arrow-guide/build.gradle
+++ b/graalpy/graalpy-apache-arrow-guide/build.gradle
@@ -1,7 +1,7 @@
plugins {
+ id 'org.graalvm.python' version '25.0.0'
id 'application'
id 'java'
- id 'org.graalvm.python' version '25.0.0'
}
group = 'com.example'
@@ -34,17 +34,17 @@ compileJava {
}
tasks.withType(JavaExec) {
- jvmArgs = ['--enable-preview']
+ jvmArgs = ['--enable-preview', '--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED']
}
dependencies {
// Apache Arrow
- implementation "org.apache.arrow:arrow-vector:$arrowVersion"
- implementation "org.apache.arrow:arrow-memory-unsafe:$arrowVersion"
+ implementation "org.apache.arrow:arrow-vector:$arrowVersion" // ①
+ implementation "org.apache.arrow:arrow-memory-unsafe:$arrowVersion" // ②
// GraalPy
- implementation "org.graalvm.python:python-community:$pythonVersion"
- implementation "org.graalvm.python:python-embedding:$pythonVersion"
+ implementation "org.graalvm.python:python-community:$pythonVersion" // ①
+ implementation "org.graalvm.python:python-embedding:$pythonVersion" // ③
}
application {
diff --git a/graalpy/graalpy-apache-arrow-guide/pom.xml b/graalpy/graalpy-apache-arrow-guide/pom.xml
index c2690f0..aa88669 100644
--- a/graalpy/graalpy-apache-arrow-guide/pom.xml
+++ b/graalpy/graalpy-apache-arrow-guide/pom.xml
@@ -59,10 +59,6 @@
pandas
pyarrow
-
-
- .*
-
process-graalpy-resources
diff --git a/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py b/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py
index a7d84e3..c38a858 100644
--- a/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py
+++ b/graalpy/graalpy-apache-arrow-guide/src/main/resources/org.graalvm.python.vfs/src/data_analysis.py
@@ -1,6 +1,7 @@
import pandas as pd
-from polyglot.arrow import Float8Vector # ①
+from polyglot.arrow import Float8Vector, enable_java_integration
+enable_java_integration() # ①
def calculateMean(valueVector: Float8Vector) -> float:
series = pd.Series(valueVector, dtype="float64[pyarrow]")