MoinMoin Logo
  • Comments
  • Immutable Page
  • Menu
    • Navigation
    • RecentChanges
    • FindPage
    • Local Site Map
    • Help
    • HelpContents
    • HelpOnMoinWikiSyntax
    • Display
    • Attachments
    • Info
    • Raw Text
    • Print View
    • Edit
    • Load
    • Save
  • Login

Navigation

  • Start
  • Sitemap
Revision 2 as of 2021-07-23 15:38:01
  • ApacheHadoop

Apache Hadoop

Apache Hadoop project develops open-source software for reliable, scalable, distributed computing.

The Apache Hadoop software library is a framework that allows for the distributed processing of large data sets across clusters of computers using simple programming models. It is designed to scale up from single servers to thousands of machines, each offering local computation and storage.

Install

   1 cd ~/tmp
   2 wget https://archive.apache.org/dist/hadoop/core/hadoop-3.3.1/hadoop-3.3.1.tar.gz
   3 tar tvzf hadoop-3.3.1.tar.gz
   4 tar xvzf hadoop-3.3.1.tar.gz

~/tmp/hadoop-3.3.1/etc/hadoop/hadoop-env.sh

export JAVA_HOME=/home/vitor/jdk-11.0.10+9

~/tmp/hadoop-3.3.1/etc/hadoop/core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://master:9000</value>
  </property>
</configuration>

~/tmp/hadoop-3.3.1/etc/hadoop/hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>/tmp/nameNode</value>
  </property>
  <property>
    <name>dfs.namenode.data.dir</name>
    <value>/tmp/dataNode</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>2</value>
   </property>
</configuration>

~/tmp/hadoop-3.3.1/etc/hadoop/mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
</configuration>

~/tmp/hadoop-3.3.1/etc/hadoop/yarn-site.xml

<?xml version="1.0"?>
<configuration>
  <property>
    <name>yarn.acl.enable</name>
    <value>0</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>master</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
</configuration>

~/tmp/hadoop-3.3.1/etc/hadoop/slaves

localhost

~/.bashrc

export HADOOP_HOME=/home/vitor/tmp/hadoop-3.3.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

Setup HDFS

ls $HADOOP_HOME/bin/
hdfs dfsadmin -report 
# 2021-07-23 14:07:17,010 WARN fs.FileSystem: Failed to initialize fileystem hdfs://master:9000: 
# java.lang.IllegalArgumentException: java.net.UnknownHostException: master
# report: java.net.UnknownHostException: master
# add 127.0.0.1 master to /etc/hosts
hdfs namenode -format master
hdfs --daemon start namenode
hdfs --daemon start datanode
yarn --daemon start resourcemanager
yarn --daemon start nodemanager
yarn --daemon start proxyserver
mapred --daemon start historyserver
hdfs dfsadmin -report
# http://localhost:9870/
# http://localhost:9870/dfshealth.html#tab-overview
# http://localhost:9870/explorer.html#
# http://localhost:8088/
# http://localhost:8088/cluster
# http://localhost:19888/

hadoop fs -ls /
hadoop fs -ls /tmp 
hadoop fs -mkdir /test
hadoop fs -ls /
  • MoinMoin Powered
  • Python Powered
  • GPL licensed
  • Valid HTML 4.01