Apache Hadoop
Apache Hadoop project develops open-source software for reliable, scalable, distributed computing.
The Apache Hadoop software library is a framework that allows for the distributed processing of large data sets across clusters of computers using simple programming models. It is designed to scale up from single servers to thousands of machines, each offering local computation and storage.
Install
~/tmp/hadoop-3.3.1/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/vitor/jdk-11.0.10+9
~/tmp/hadoop-3.3.1/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.default.name</name> <value>hdfs://master:9000</value> </property> </configuration>
~/tmp/hadoop-3.3.1/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.namenode.name.dir</name> <value>/tmp/nameNode</value> </property> <property> <name>dfs.namenode.data.dir</name> <value>/tmp/dataNode</value> </property> <property> <name>dfs.replication</name> <value>2</value> </property> </configuration>
~/tmp/hadoop-3.3.1/etc/hadoop/mapred-site.xml
<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
~/tmp/hadoop-3.3.1/etc/hadoop/yarn-site.xml
<?xml version="1.0"?> <configuration> <property> <name>yarn.acl.enable</name> <value>0</value> </property> <property> <name>yarn.resourcemanager.hostname</name> <value>master</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration>
~/tmp/hadoop-3.3.1/etc/hadoop/slaves
localhost
~/.bashrc
export HADOOP_HOME=/home/vitor/tmp/hadoop-3.3.1 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
Setup HDFS
ls $HADOOP_HOME/bin/ hdfs dfsadmin -report # 2021-07-23 14:07:17,010 WARN fs.FileSystem: Failed to initialize fileystem hdfs://master:9000: # java.lang.IllegalArgumentException: java.net.UnknownHostException: master # report: java.net.UnknownHostException: master # add 127.0.0.1 master to /etc/hosts hdfs namenode -format master hdfs --daemon start namenode hdfs --daemon start datanode yarn --daemon start resourcemanager yarn --daemon start nodemanager yarn --daemon start proxyserver mapred --daemon start historyserver hdfs dfsadmin -report # http://localhost:9870/ # http://localhost:9870/dfshealth.html#tab-overview # http://localhost:9870/explorer.html# # http://localhost:8088/ # http://localhost:8088/cluster # http://localhost:19888/ hadoop fs -ls / hadoop fs -ls /tmp hadoop fs -mkdir /test hadoop fs -ls /