Here is a simple example getting started using k8s with spark.
Deploy Kubernetes
$ juju add-model k8s-spark-test --credential jamesbeedy-pdl aws/us-west-2
$ juju deploy kubernetes-core
Get Spark and run spark-submit
against the k8s you just deployed
$ wget http://apache.claz.org/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz
$ tar -xzvf spark-2.4.0-bin-hadoop2.7.tgz
$ ./spark-2.4.0-bin-hadoop2.7/bin/spark-submit --name spark-pi \
--master k8s://https://<kubernetes-master-ip>:6443 --deploy-mode cluster \
--class org.apache.spark.examples.SparkPi \
--conf spark.executor.instances=5 \
--conf spark.kubernetes.container.image=gcr.io/spark-operator/spark:v2.4.0 \
/opt/spark/examples/jars/spark-examples_2.11-2.4.0.jar
2018-11-18 15:50:49 INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:
pod name: spark-pi-1542585048157-driver
namespace: default
labels: spark-app-selector -> spark-c30de13508624b4baadca8f18001d37a, spark-role -> driver
pod uid: c6902021-eb8c-11e8-9353-0a4121e2f940
creation time: 2018-11-18T23:50:49Z
service account name: default
volumes: spark-local-dir-1, spark-conf-volume, default-token-qg68t
node name: N/A
start time: N/A
container images: N/A
phase: Pending
status: []
2018-11-18 15:50:49 INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:
pod name: spark-pi-1542585048157-driver
namespace: default
labels: spark-app-selector -> spark-c30de13508624b4baadca8f18001d37a, spark-role -> driver
pod uid: c6902021-eb8c-11e8-9353-0a4121e2f940
creation time: 2018-11-18T23:50:49Z
service account name: default
volumes: spark-local-dir-1, spark-conf-volume, default-token-qg68t
node name: ip-172-31-102-237
start time: N/A
container images: N/A
phase: Pending
status: []
2018-11-18 15:50:49 INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:
pod name: spark-pi-1542585048157-driver
namespace: default
labels: spark-app-selector -> spark-c30de13508624b4baadca8f18001d37a, spark-role -> driver
pod uid: c6902021-eb8c-11e8-9353-0a4121e2f940
creation time: 2018-11-18T23:50:49Z
service account name: default
volumes: spark-local-dir-1, spark-conf-volume, default-token-qg68t
node name: ip-172-31-102-237
start time: 2018-11-18T23:50:49Z
container images: gcr.io/spark-operator/spark:v2.4.0
phase: Pending
status: [ContainerStatus(containerID=null, image=gcr.io/spark-operator/spark:v2.4.0, imageID=, lastState=ContainerState(running=null, terminated=null, waiting=null, additionalProperties={}), name=spark-kubernetes-driver, ready=false, restartCount=0, state=ContainerState(running=null, terminated=null, waiting=ContainerStateWaiting(message=null, reason=ContainerCreating, additionalProperties={}), additionalProperties={}), additionalProperties={})]
2018-11-18 15:50:49 INFO Client:54 - Waiting for application spark-pi to finish...
2018-11-18 15:51:02 INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:
pod name: spark-pi-1542585048157-driver
namespace: default
labels: spark-app-selector -> spark-c30de13508624b4baadca8f18001d37a, spark-role -> driver
pod uid: c6902021-eb8c-11e8-9353-0a4121e2f940
creation time: 2018-11-18T23:50:49Z
service account name: default
volumes: spark-local-dir-1, spark-conf-volume, default-token-qg68t
node name: ip-172-31-102-237
start time: 2018-11-18T23:50:49Z
container images: gcr.io/spark-operator/spark:v2.4.0
phase: Running
status: [ContainerStatus(containerID=docker://5d9adab97e25d3320f318037134ac3271f422597e3b223c7a069c7076d0fc109, image=gcr.io/spark-operator/spark:v2.4.0, imageID=docker-pullable://gcr.io/spark-operator/spark@sha256:375cd079d14179687b37625ce850c905c452010325d62c35618b8e0c8ddc749b, lastState=ContainerState(running=null, terminated=null, waiting=null, additionalProperties={}), name=spark-kubernetes-driver, ready=true, restartCount=0, state=ContainerState(running=ContainerStateRunning(startedAt=Time(time=2018-11-18T23:51:01Z, additionalProperties={}), additionalProperties={}), terminated=null, waiting=null, additionalProperties={}), additionalProperties={})]
2018-11-18 15:51:38 INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:
pod name: spark-pi-1542585048157-driver
namespace: default
labels: spark-app-selector -> spark-c30de13508624b4baadca8f18001d37a, spark-role -> driver
pod uid: c6902021-eb8c-11e8-9353-0a4121e2f940
creation time: 2018-11-18T23:50:49Z
service account name: default
volumes: spark-local-dir-1, spark-conf-volume, default-token-qg68t
node name: ip-172-31-102-237
start time: 2018-11-18T23:50:49Z
container images: gcr.io/spark-operator/spark:v2.4.0
phase: Succeeded
status: [ContainerStatus(containerID=docker://5d9adab97e25d3320f318037134ac3271f422597e3b223c7a069c7076d0fc109, image=gcr.io/spark-operator/spark:v2.4.0, imageID=docker-pullable://gcr.io/spark-operator/spark@sha256:375cd079d14179687b37625ce850c905c452010325d62c35618b8e0c8ddc749b, lastState=ContainerState(running=null, terminated=null, waiting=null, additionalProperties={}), name=spark-kubernetes-driver, ready=false, restartCount=0, state=ContainerState(running=null, terminated=ContainerStateTerminated(containerID=docker://5d9adab97e25d3320f318037134ac3271f422597e3b223c7a069c7076d0fc109, exitCode=0, finishedAt=Time(time=2018-11-18T23:51:36Z, additionalProperties={}), message=null, reason=Completed, signal=null, startedAt=Time(time=2018-11-18T23:51:01Z, additionalProperties={}), additionalProperties={}), waiting=null, additionalProperties={}), additionalProperties={})]
2018-11-18 15:51:38 INFO LoggingPodStatusWatcherImpl:54 - Container final statuses:
Container name: spark-kubernetes-driver
Container image: gcr.io/spark-operator/spark:v2.4.0
Container state: Terminated
Exit code: 0
2018-11-18 15:51:38 INFO Client:54 - Application spark-pi finished.
2018-11-18 15:51:38 INFO ShutdownHookManager:54 - Shutdown hook called
2018-11-18 15:51:38 INFO ShutdownHookManager:54 - Deleting directory /private/var/folders/sf/msmkhyjn3rj9qtncd3wmdlrh0000gn/T/spark-cf836c49-ec57-4efa-bdc9-f299ad1de9dd