<aside> 💡

wget <https://dlcdn.apache.org/spark/spark-4.0.0/spark-4.0.0-bin-hadoop3.tgz>
tar -xvzf spark-4.0.0-bin-hadoop3.tgz && mv spark-4.0.0-bin-hadoop3 spark

image.png

</aside>

<aside> 💡

nano ~/.bashrc

# Spark environment variables
export SPARK_HOME=$HOME/spark
export PATH=$SPARK_HOME/bin:$PATH
export PYSPARK_PYTHON=python3.12

Ctrl + O -> Enter -> Ctrl + X

source ~/.bashrc

image.png

</aside>

<aside> 💡

cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh
nano $SPARK_HOME/conf/spark-env.sh

export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH

Ctrl + O -> Enter -> Ctrl + X

spark-shell

Ctrl + C

pyspark

exit()

image.png

image.png

image.png

</aside>

<aside> 💡

Cấu hình cho kafka

cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf
nano $SPARK_HOME/conf/spark-defaults.conf

spark.jars.packages org.apache.spark:spark-sql-kafka-0-10_2.13:4.0.0

Ctrl + O -> Enter -> Ctrl + X

</aside>