Skip to content

Commit

Permalink
Add Affirm specific PySpark related changes
Browse files Browse the repository at this point in the history
- Update manifest
- Update .gitignore
- Update setup.py
- Update entrypoint.sh

See this [historical PR](https://github.com/Affirm/spark/pull/20/files) for details.
  • Loading branch information
mrafayaleem committed Jan 31, 2025
1 parent 16e9a22 commit d480ce7
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 1 deletion.
1 change: 1 addition & 0 deletions python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ docs/_build/
pyspark.egg-info
build/
dist/
venv/
1 change: 1 addition & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
global-exclude *.py[cod] __pycache__ .DS_Store
recursive-include deps/jars *.jar
graft deps/bin
graft deps/k8s
recursive-include deps/sbin spark-config.sh spark-daemon.sh start-history-server.sh stop-history-server.sh
recursive-include deps/data *.data *.txt
recursive-include deps/licenses *.txt
Expand Down
9 changes: 9 additions & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,15 @@
USER_SCRIPTS_PATH = os.path.join(SPARK_HOME, "sbin")
DATA_PATH = os.path.join(SPARK_HOME, "data")
LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
DOCKER_PATH = os.path.join(SPARK_HOME, "resource-managers/kubernetes/docker/src/main/dockerfiles/spark")

SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
USER_SCRIPTS_TARGET = os.path.join(TEMP_PATH, "sbin")
JARS_TARGET = os.path.join(TEMP_PATH, "jars")
EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
DATA_TARGET = os.path.join(TEMP_PATH, "data")
LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")
DOCKER_TARGET = os.path.join(TEMP_PATH, "k8s")

# Check and see if we are under the spark path in which case we need to build the symlink farm.
# This is important because we only want to build the symlink farm while under Spark otherwise we
Expand Down Expand Up @@ -192,6 +194,7 @@ def run(self):
os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
os.symlink(DATA_PATH, DATA_TARGET)
os.symlink(LICENSES_PATH, LICENSES_TARGET)
os.symlink(DOCKER_PATH, DOCKER_TARGET)
else:
# For windows fall back to the slower copytree
copytree(JARS_PATH, JARS_TARGET)
Expand All @@ -200,6 +203,7 @@ def run(self):
copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
copytree(DATA_PATH, DATA_TARGET)
copytree(LICENSES_PATH, LICENSES_TARGET)
copytree(DOCKER_PATH, DOCKER_TARGET)
else:
# If we are not inside of SPARK_HOME verify we have the required symlink farm
if not os.path.exists(JARS_TARGET):
Expand Down Expand Up @@ -258,6 +262,7 @@ def run(self):
"pyspark.sql.connect.streaming.worker",
"pyspark.bin",
"pyspark.sbin",
'pyspark.k8s',
"pyspark.jars",
"pyspark.pandas",
"pyspark.pandas.data_type_ops",
Expand All @@ -281,6 +286,7 @@ def run(self):
package_dir={
"pyspark.jars": "deps/jars",
"pyspark.bin": "deps/bin",
'pyspark.k8s': 'deps/k8s',
"pyspark.sbin": "deps/sbin",
"pyspark.python.lib": "lib",
"pyspark.data": "deps/data",
Expand All @@ -290,6 +296,7 @@ def run(self):
package_data={
"pyspark.jars": ["*.jar"],
"pyspark.bin": ["*"],
'pyspark.k8s': ['*'],
"pyspark.sbin": [
"spark-config.sh",
"spark-daemon.sh",
Expand Down Expand Up @@ -356,11 +363,13 @@ def run(self):
os.remove(os.path.join(TEMP_PATH, "examples"))
os.remove(os.path.join(TEMP_PATH, "data"))
os.remove(os.path.join(TEMP_PATH, "licenses"))
os.remove(os.path.join(TEMP_PATH, "k8s"))
else:
rmtree(os.path.join(TEMP_PATH, "jars"))
rmtree(os.path.join(TEMP_PATH, "bin"))
rmtree(os.path.join(TEMP_PATH, "sbin"))
rmtree(os.path.join(TEMP_PATH, "examples"))
rmtree(os.path.join(TEMP_PATH, "data"))
rmtree(os.path.join(TEMP_PATH, "licenses"))
rmtree(os.path.join(TEMP_PATH, "k8s"))
os.rmdir(TEMP_PATH)
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,4 @@ case "$1" in
esac

# Execute the container CMD under tini for better hygiene
exec /usr/bin/tini -s -- "${CMD[@]}"
exec /tini -s -- "${CMD[@]}"

0 comments on commit d480ce7

Please sign in to comment.