-
Notifications
You must be signed in to change notification settings - Fork 0
/
bootstrap.sh
85 lines (77 loc) · 3.4 KB
/
bootstrap.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env bash
apt-get update
apt-get install -y python3 default-jre python3-pip
alias python=python3
add-apt-repository ppa:openjdk-r/ppa
apt-get update
apt-get install openjdk-11-jdk
# a simple two-panel file commander
apt-get install -y mc
# uncomment one of the following for graphical desktops
# NOTE: the graphical desktop is accessible through
# the main VirtualBox window (Show button)
#
# - minimal: wm & graphical server
# apt-get install -y icewm xinit xterm python3-tk
#
# - minimal desktop env: lxqt
# apt-get install -y xinit lxqt
#
# cd to the shared directory
cd /vagrant
# python packages
pip3 install matplotlib pandas seaborn scikit-learn plotly scipy
# jupyter
pip3 install jupyter
#findspark is just for development purposes in standalone mode
pip3 install findspark
# uncomment and modify to remove a previously installed Spark version
# rm -rf /usr/local/spark-3.0.0-preview2-bin-hadoop2.7
# remove any previously downloaded file
rm -rf spark-3.*-bin-hadoop*.tgz*
if ! [ -d /usr/local/spark-3.5.0-bin-hadoop3 ]; then
# current link as of 2023-11-24:
wget https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz
tar -C /usr/local -xvzf spark-3.5.0-bin-hadoop3.tgz
rm spark-3.5.0-bin-hadoop3.tgz
fi
if ! [ -d /usr/local/hadoop-3.4.0 ]; then
wget https://downloads.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
tar -C /usr/local -xvzf hadoop-3.4.0.tar.gz
rm hadoop-3.4.0.tar.gz
fi
if ! grep "export HADOOP_INSTALL=/usr/local/hadoop-3.4.0" /home/vagrant/.bashrc; then
echo "export HADOOP_INSTALL=/usr/local/hadoop-3.4.0" >> /home/vagrant/.bashrc
fi
if ! grep "export HADOOP_HOME=/usr/local/hadoop-3.4.0" /home/vagrant/.bashrc; then
echo "export HADOOP_HOME=/usr/local/hadoop-3.4.0" >> /home/vagrant/.bashrc
fi
if ! grep "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/" /home/vagrant/.bashrc; then
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/" >> /home/vagrant/.bashrc
echo "export PATH=$PATH:$JAVA_HOME/jre/bin" >> /home/vagrant/.bashrc
fi
if ! grep "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/" /usr/local/hadoop-3.4.0/etc/hadoop/hadoop-env.sh; then
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/" >> /usr/local/hadoop-3.4.0/etc/hadoop/hadoop-env.sh
fi
if ! grep "export HADOOP_MAPRED_HOME=/usr/local/hadoop-3.4.0" /home/vagrant/.bashrc; then
echo "export HADOOP_MAPRED_HOME=/usr/local/hadoop-3.4.0" >> /home/vagrant/.bashrc
fi
if ! grep "export PYSPARK_PYTHON=/usr/bin/python3" /home/vagrant/.bashrc; then
echo "export PYSPARK_PYTHON=/usr/bin/python3" >> /home/vagrant/.bashrc
fi
if ! grep "export PYSPARK_DRIVER_PYTHON=/usr/bin/python3" /home/vagrant/.bashrc; then
echo "export PYSPARK_DRIVER_PYTHON=/usr/bin/python3" >> /home/vagrant/.bashrc
fi
if ! grep "export SPARK_HOME=/usr/local/spark-3.5.0-bin-hadoop3" /home/vagrant/.bashrc; then
echo "export SPARK_HOME=/usr/local/spark-3.5.0-bin-hadoop3" >> /home/vagrant/.bashrc
echo "export PATH=$PATH:/usr/local/spark-3.5.0-bin-hadoop3/bin:/usr/local/spark-3.5.0-bin-hadoop3/sbin" >> /home/vagrant/.bashrc
fi
if ! grep "export HADOOP_CONF_DIR=/usr/local/hadoop-3.4.0/etc/hadoop" /home/vagrant/.bashrc; then
echo "export HADOOP_CONF_DIR=/usr/local/hadoop-3.4.0/etc/hadoop" >> /home/vagrant/.bashrc
fi
# setting up hostnames
echo "127.0.0.1 localhost
192.168.56.0 spark-master
192.168.56.3 spark-slave-2
192.168.56.2 spark-slave-3" > /etc/hosts
echo "## VM configuration completed ##"