Hadoop docker

来自ling
跳转至: 导航搜索

链接

https://archive.apache.org/dist/hadoop/common/

https://archive.apache.org/dist/flink/

https://archive.apache.org/dist/zookeeper/

https://repo.maven.apache.org/maven2/org/apache/flink/

https://flink.apache.org/

flink-shaded-hadoop-2-uber

https://scala-lang.org/download

hadoop镜像1

https://blog.csdn.net/lt5227/article/details/120309717

hadoop镜像2

https://blog.csdn.net/tian1345/article/details/109511799

https://www.dandelioncloud.cn/article/details/1501525340693540866

centos8-ssh

No URLs in mirrorlist

centos7-ssh

tee /alidata/dockerdata/dockerfiles/centos7-ssh/Dockerfile <<-'EOF'
FROM centos:7
MAINTAINER  bo.wang
# 执行的命令
RUN  yum -y install openssh-server vim openssh-clients
RUN  sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config

RUN echo "root:222222"  | chpasswd
RUN echo "root ALL=(ALL)  ALL"  >> /etc/sudoers
RUN ssh-keygen -t dsa  -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -t rsa  -f /etc/ssh/ssh_host_rsa_key

RUN mkdir /var/run/sshd
EXPOSE 22
CMD ["/usr/sbin/sshd","-D"]

EOF
cd /alidata/dockerdata/dockerfiles/centos7-ssh
docker build -t myos:ssh . -f Dockerfile

pipework

mkdir -p /alidata/dockerdata/dockerfiles/pipework
cd /alidata/dockerdata/dockerfiles/pipework
git clone https://github.com/jpetazzo/pipework.git
cd pipework/
cp pipework  /usr/local/bin/
yum -y install bridge-utils


创建网络

brctl addbr br1        # 添加bridge
ip link set dev br1 up    # 激活网桥设备br1
ip addr add 192.168.1.1/24 dev br1     # 给br1添加ip
docker run -d --name=myos.ssh myos:ssh 
pipework  br1 myos.ssh 192.168.1.2/24
ping 192.168.1.2
ssh  192.168.1.2

hadoop

mkdir -p /alidata/dockerdata/dockerfiles/hadoop
cd /alidata/dockerdata/dockerfiles/hadoop
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
rz hadoop-2.7.2.tar.gz
rz jdk-8u271-linux-x64.tar.gz
tee /alidata/dockerdata/dockerfiles/hadoop/Dockerfile <<-'EOF'
FROM myos:ssh
ADD jdk-8u271-linux-x64.tar.gz  /usr/local
RUN mv /usr/local/jdk1.8.0_271   /usr/local/jdk1.8
ENV JAVA_HOME  /usr/local/jdk1.8
ENV PATH $JAVA_HOME/bin:$PATH

ADD hadoop-2.7.2.tar.gz  /usr/local
RUN mv /usr/local/hadoop-2.7.2  /usr/local/hadoop
ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $HADOOP_HOME/bin:$PATH

RUN yum -y install which sudo vim bash-completion
EOF
cd /alidata/dockerdata/dockerfiles/hadoop
docker build -t myos:hadoopbase . -f Dockerfile

启动hadoop

运行三个容器,分别给每个容器命名

容器hadoop0启动时,映射了端口号,50070和8088,是用来在浏览器中访问hadoop WEB界面的

docker run --name hadoop0 --hostname hadoop0  -d -p 50070:50070  -p 8088:8088 myos:hadoopbase # -p映射端口
docker run --name hadoop1 --hostname hadoop1  -d -P myos:hadoopbase # -P随机端口映射,容器内部端口随机映射到主机的端口
docker run --name hadoop2 --hostname hadoop2  -d -P myos:hadoopbase

配置ssh

docker exec -it hadoop0  /bin/bash
docker exec -it hadoop1  /bin/bash
docker exec -it hadoop2  /bin/bash
ip a

在每个容器修改/etc/hosts配置文件

vi /etc/hosts    

172.17.0.7      hadoop0
172.17.0.8      hadoop1
172.17.0.9      hadoop2


配置三台主机互相免密登录

将公钥传给包括自己的每台主机,三个容器都要做!!!确保最终每台主机都能免密访问其他主机包括自己

[root@hadoop0 /]# ssh-keygen        # 在每台主机都执行该操作
[root@hadoop0 /]# for i in hadoop{0..2}; do ssh-copy-id root@$i; done

安装配置Hadoop

# 进入容器,修改配置文件
[root@hadoop0 /]# cd /usr/local/hadoop/
[root@hadoop0 hadoop]# ls
LICENSE.txt  NOTICE.txt  README.txt  bin  etc  include	lib  libexec  sbin  share
[root@hadoop0 hadoop]# mkdir tmp hdfs
[root@hadoop0 hadoop]# mkdir hdfs/data  hdfs/name


# 修改配置文件
[root@hadoop0 hadoop]# vim etc/hadoop/core-site.xml 
在 <configuration> 块儿中添加:
    <property>
      <name>fs.defaultFS</name>
      <value>hdfs://hadoop0:9000</value>
     </property>
     <property>
       <name>hadoop.tmp.dir</name>
       <value>file:/home/hadoop/tmp</value>
     </property>
     <property>
       <name>io.file.buffer.size</name>
       <value>131702</value>
     </property>
     
     
[root@hadoop0 hadoop]# vim etc/hadoop/hdfs-site.xml 
在 <configuration> 块儿中添加:
   <property>
      <name>dfs.namenode.name.dir</name>
      <value>file:/home/hadoop/hdfs/name</value>
   </property>
   <property>
      <name>dfs.datanode.data.dir</name>
      <value>file:/home/hadoop/hdfs/data</value>
   </property>
   <property>
      <name>dfs.replication</name>
      <value>2</value>
   </property>
   <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>hadoop0:9001</value>
   </property>
   <property>
      <name>dfs.webhdfs.enabled</name>
      <value>true</value>
   </property>


[root@hadoop0 hadoop]# cp etc/hadoop/mapred-site.xml.template   etc/hadoop/mapred-site.xml          
[root@hadoop0 hadoop]# vim etc/hadoop/mapred-site.xml
在 <configuration> 块儿中添加:
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>hadoop0:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>hadoop0:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>hadoop0:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address</name>
        <value>hadoop0:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>hadoop0:8088</value>
    </property>


[root@hadoop0 hadoop]# vim etc/hadoop/slaves 
删除已有内容,添加:
hadoop1
hadoop2


[root@hadoop0 hadoop]# vim etc/hadoop/hadoop-env.sh 
找到 export JAVA_HOME=${JAVA_HOME},改为自己JAVA_HOME的绝对路径
export JAVA_HOME=/usr/local/jdk1.8

# 复制 hadoop目录 到hadoop1,hadoop2
[root@hadoop0 ~]# scp -r /usr/local/hadoop  root@hadoop1:/usr/local/hadoop
[root@hadoop0 ~]# scp -r /usr/local/hadoop  root@hadoop2:/usr/local/hadoop

 scp -r /usr/local/hadoop/etc/hadoop/core-site.xml  root@hadoop1:/usr/local/hadoop/etc/hadoop/core-site.xml
 scp -r /usr/local/hadoop/etc/hadoop/hdfs-site.xml  root@hadoop1:/usr/local/hadoop/etc/hadoop/hdfs-site.xml
 scp -r /usr/local/hadoop/etc/hadoop/hadoop-env.sh   root@hadoop1:/usr/local/hadoop/etc/hadoop/hadoop-env.sh 


 scp -r /usr/local/hadoop/etc/hadoop/core-site.xml  root@hadoop2:/usr/local/hadoop/etc/hadoop/core-site.xml
 scp -r /usr/local/hadoop/etc/hadoop/hdfs-site.xml  root@hadoop2:/usr/local/hadoop/etc/hadoop/hdfs-site.xml
 scp -r /usr/local/hadoop/etc/hadoop/hadoop-env.sh   root@hadoop2:/usr/local/hadoop/etc/hadoop/hadoop-env.sh

container生产image

docker cp hadoop0:/usr/local/hadoop/etc/hadoop/core-site.xml /alidata/dockerdata/dockerfiles/hadoop
docker cp hadoop0:/usr/local/hadoop/etc/hadoop/hdfs-site.xml /alidata/dockerdata/dockerfiles/hadoop
docker cp hadoop0:/usr/local/hadoop/etc/hadoop/hadoop-env.sh /alidata/dockerdata/dockerfiles/hadoop


docker commit hadoop0 myos:haoopmaster
docker commit hadoop1 myos:haoopslave

启动 hadoop

在master启动hadoop,从节点会自动启动

初始化
[root@hadoop0 ~]# hdfs namenode -format
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hadoop0/172.17.0.5
************************************************************/

启动
[root@hadoop0 ~]# cd /usr/local/hadoop/sbin/
[root@hadoop0 sbin]# ./hadoop-daemon.sh start  namenode
[root@hadoop0 sbin]# ./hadoop-daemon.sh start datanode
[root@hadoop0 sbin]# ./start-dfs.sh
[root@hadoop0 sbin]# ./start-yarn.sh
[root@hadoop0 sbin]# ./mr-jobhistory-daemon.sh  start historyserver

验证

测试
# 在三台主机分别查看状态
[root@hadoop0 ~]# jps
418 DataNode
324 NameNode
1685 JobHistoryServer
1397 ResourceManager
1150 SecondaryNameNode
1758 Jps

[root@hadoop1 ~]# jps
550 Jps
312 DataNode
427 NodeManager

[root@hadoop2 ~]# jps
548 Jps
310 DataNode
425 NodeManager

浏览器中访问:

http://192.168.74.134:50070/

ps:在启动docker容器的时候已经设置了本机和容器的映射,所以此时只需要在云主机当中设置安全组放开并进行访问即可

http://192.168.74.134:8088/


可以正常访问的话,可以说明集群启动成功了,但不一定可以正常运行,还需要下面的实际验证

上传

docker build -t centos7-ssh:1.0 . -f Dockerfile
docker login --username=102010cncger@sina.com registry.cn-hangzhou.aliyuncs.com
docker tag [ImageId] registry.cn-hangzhou.aliyuncs.com/ling/centos:1.0
docker push registry.cn-hangzhou.aliyuncs.com/ling/centos:1.0

flink

https://www.ikeguang.com/article/1976

https://flink.apache.org/downloads.html

https://www.ikeguang.com/article/1985

flink docker

https://nightlies.apache.org/flink/flink-docs-release-1.15/zh/docs/deployment/resource-providers/standalone/docker/

FLINK_PROPERTIES="jobmanager.rpc.address: jobmanager"
docker network create flink-network
docker run \
   -d \
   --name=jobmanager \
   --network flink-network \
   --publish 8081:8081 \
   --env FLINK_PROPERTIES="${FLINK_PROPERTIES}" \
   flink:1.15.1-scala_2.12 jobmanager
docker run \
   -d \
   --name=taskmanager \
   --network flink-network \
   --env FLINK_PROPERTIES="${FLINK_PROPERTIES}" \
   flink:1.15.1-scala_2.12 taskmanager

http://192.168.74.134:8081/

flink yarn

https://nightlies.apache.org/flink/flink-docs-release-1.15/zh/docs/deployment/resource-providers/yarn/

docker-compose.yml

version: "2.2"
services:
  jobmanager:
    image: flink:1.15.1-scala_2.12
    ports:
      - "8081:8081"
    command: jobmanager
    environment:
      - |
        FLINK_PROPERTIES=
        jobmanager.rpc.address: jobmanager        

  taskmanager:
    image: flink:1.15.1-scala_2.12
    depends_on:
      - jobmanager
    command: taskmanager
    scale: 1
    environment:
      - |
        FLINK_PROPERTIES=
        jobmanager.rpc.address: jobmanager
        taskmanager.numberOfTaskSlots: 2

问题解决

关于hadoop集群搭建后,访问Browse Directory目录的权限问题