<aside> 💡
wget <https://dlcdn.apache.org/spark/spark-4.0.0/spark-4.0.0-bin-hadoop3.tgz>
tar -xvzf spark-4.0.0-bin-hadoop3.tgz && mv spark-4.0.0-bin-hadoop3 spark
</aside>
<aside> 💡
nano ~/.bashrc
# Spark environment variables
export SPARK_HOME=$HOME/spark
export PATH=$SPARK_HOME/bin:$PATH
export PYSPARK_PYTHON=python3.12
Ctrl + O -> Enter -> Ctrl + X
source ~/.bashrc
</aside>
<aside> 💡
cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh
nano $SPARK_HOME/conf/spark-env.sh
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH
Ctrl + O -> Enter -> Ctrl + X
spark-shell
Ctrl + C
pyspark
exit()
</aside>
<aside> 💡
Cấu hình cho kafka
cp $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf
nano $SPARK_HOME/conf/spark-defaults.conf
spark.jars.packages org.apache.spark:spark-sql-kafka-0-10_2.13:4.0.0
Ctrl + O -> Enter -> Ctrl + X
</aside>