%env HADOOP_VERSION 2.9.2
%env HADOOP_PATH hadoop-2.9.2
!wget http://ftp.unicamp.br/pub/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -q --show-progress
# !rm ${HADOOP_PATH} -r
!tar -xvf hadoop-${HADOOP_VERSION}.tar.gz >/dev/null
!rm hadoop-${HADOOP_VERSION}.tar.gz
!dirname $(dirname $(readlink -f $(which javac)))
We also added it to user's .bashrc so it will be loaded as the nodes perform ssh connections.
%env JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
!echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 " >> ~/.bashrc
!echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 " >> ~/.profile
!echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 " >> ${HADOOP_PATH}/etc/hadoop/hadoop-env.sh
!${HADOOP_PATH}/bin/hadoop jar ${HADOOP_PATH}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar wordcount \
./resources/examples/newyorknewyork.txt ./output
!ls ./output/
! cat ./output/part-r-00000
Check /binder/postBuild
and /resources/configs/ssh/sshd_config
files for more details
!/usr/sbin/sshd -f resources/configs/ssh/sshd_config
Commands below stablish ssh connections to used host names/ips. This step avoids yes/no host confirmation.
!ssh -o "StrictHostKeyChecking no" $USER@localhost -p 8822 -C "exit"
!ssh -o "StrictHostKeyChecking no" $USER@0.0.0.0 -p 8822 -C "exit"
-p 8822
)-o StrictHostKeyChecking=no
)%env HADOOP_SSH_OPTS= -o StrictHostKeyChecking=no -p 8822
%env PDSH_RCMD_TYPE ssh
Check the configuration files accordingly to the Hadoop version.
Refer to the /resources/configs/hadoop/<version>
.
!cp resources/configs/hadoop/${HADOOP_VERSION}/core-site.xml ${HADOOP_PATH}/etc/hadoop/
!cp resources/configs/hadoop/${HADOOP_VERSION}/hdfs-site.xml ${HADOOP_PATH}/etc/hadoop/
!${HADOOP_PATH}/bin/hdfs namenode -format -force -nonInteractive
!${HADOOP_PATH}/sbin/start-dfs.sh
!jps
!${HADOOP_PATH}/bin/hdfs dfs -mkdir /user/
!${HADOOP_PATH}/bin/hdfs dfs -mkdir /user/matheus/
!${HADOOP_PATH}/bin/hdfs dfs -mkdir /user/matheus/input/
!${HADOOP_PATH}/bin/hdfs dfs -put ./resources/examples/newyorknewyork.txt /user/matheus/input/
!${HADOOP_PATH}/bin/hdfs dfs -ls /user/matheus/input/
!${HADOOP_PATH}/bin/hdfs dfs -cat /user/matheus/input/newyorknewyork.txt
!./${HADOOP_PATH}/bin/hadoop jar ./${HADOOP_PATH}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar wordcount \
/user/matheus/input /user/matheus/output
!./${HADOOP_PATH}/bin/hdfs dfs -ls /user/matheus/output/
!./${HADOOP_PATH}/bin/hdfs dfs -cat /user/matheus/output/part-r-00000
!cp resources/configs/hadoop/${HADOOP_VERSION}/mapred-site.xml ${HADOOP_PATH}/etc/hadoop/
!cp resources/configs/hadoop/${HADOOP_VERSION}/yarn-site.xml ${HADOOP_PATH}/etc/hadoop/
!${HADOOP_PATH}/sbin/start-yarn.sh
!jps
!./${HADOOP_PATH}/bin/yarn jar ./${HADOOP_PATH}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar wordcount \
/user/matheus/input /user/matheus/output2
!./${HADOOP_PATH}/bin/hdfs dfs -ls /user/matheus/output2/
!./${HADOOP_PATH}/bin/hdfs dfs -cat /user/matheus/output2/part-r-00000