DataSphere

来自ling
跳转至: 导航搜索

安装

https://zhuanlan.zhihu.com/p/555062985

https://zhuanlan.zhihu.com/p/556259593

docker cp /ling-cloud/apache-hive-2.3.3-bin.tar.gz 53969e6b563c:/home/hadoop
docker cp /ling-cloud/hadoop-2.7.2.tar.gz 53969e6b563c:/home/hadoop
docker cp /ling-cloud/dss_linkis_one-click_install_20220704.zip 53969e6b563c:/home/hadoop
docker cp /ling-cloud/spark-2.4.3-bin-hadoop2.7.tgz 53969e6b563c:/home/hadoop
docker cp /ling-cloud/spark-2.4.3-bin-without-hadoop.tgz 53969e6b563c:/home/hadoop

hadoop

docker run -i -t registry.cn-shanghai.aliyuncs.com/ling/centos7-base:0.3 /bin/bash

netstat -tunlp

如果没有22端口可以执行 /usr/sbin/sshd -D &启动

root账号执行

sudo useradd hadoop

sudo echo "hadoop  ALL=(ALL)  NOPASSWD: NOPASSWD: ALL">>/etc/sudoers

ssh-keygen

ssh-copy-id -i ~/.ssh/id_rsa.pub root@127.0.0.1
配置成功后,测试下是否成功,如果不需要输入密码,证明配置成功。

ssh localhost
sudo rpm -ivh http://repo.mysql.com/yum/mysql-5.5-community/el/6/x86_64/mysql-community-release-el6-5.noarch.rpm
#sudo yum install mysql-community-client mysql-community-devel mysql-community-server php-mysql

sudo rpm -ivh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm

sudo yum install -y java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9 java-1.8.0-openjdk-devel-1.8.0.342.b07-1.el7_9 nginx dos2unix mysql initscripts

sudo systemctl enable nginx

sudo systemctl start nginx

hadoop账号执行

su hadoop

cd ~

wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz

tar xvf hadoop-2.7.2.tar.gz
sudo mkdir -p /opt/hadoop
sudo mv hadoop-2.7.2 /opt/hadoop/


sudo vim /etc/profile


export HADOOP_HOME=/opt/hadoop/hadoop-2.7.2
export HIVE_CONF_DIR=/opt/hive/apache-hive-2.3.3-bin/conf
export HIVE_AUX_JARS_PATH=/opt/hive/apache-hive-2.3.3-bin/lib
export HIVE_HOME=/opt/hive/apache-hive-2.3.3-bin
export SPARK_HOME=/opt/spark/spark-2.4.3-bin-without-hadoop
export HADOOP_CONF_DIR=/opt/hadoop/hadoop-2.7.2/etc/hadoop

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
export PATH=$JAVA_HOME/bin:$PATH:$HADOOP_HOME/bin:$HIVE_HOME/bin:$SPARK_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"

source /etc/profile


sudo vi /etc/hosts

添加 127.0.0.1 namenode

配置Hadoop

mkdir  -p /opt/hadoop/hadoop-2.7.2/hadoopinfra/hdfs/namenode
mkdir  -p /opt/hadoop/hadoop-2.7.2/hadoopinfra/hdfs/datanode

vi /opt/hadoop/hadoop-2.7.2/etc/hadoop/core-site.xml
core-site.xml修改如下

<!-- Put site-specific property overrides in this file. -->


<configuration>
    <!-- 指定HDFS中NameNode的地址 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://127.0.0.1:9000</value>
    </property>

    <!-- 指定Hadoop运行时产生文件的存储目录 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/hadoop/hadoop-2.7.2/data/tmp</value>
    </property>

    <property>
       <name>hadoop.proxyuser.hadoop.hosts</name> 
       <value>*</value> 
     </property> 
     <property> 
       <name>hadoop.proxyuser.hadoop.groups</name> 
       <value>*</value> 
     </property>
</configuration>
修改Hadoop的hdfs目录配置

vi /opt/hadoop/hadoop-2.7.2/etc/hadoop/hdfs-site.xml
hdfs-site.xml修改如下

<configuration>
   <property> 
      <name>dfs.replication</name> 
      <value>1</value> 
   </property> 
   <property> 
      <name>dfs.name.dir</name> 
      <value>/opt/hadoop/hadoop-2.7.2/hadoopinfra/hdfs/namenode</value> 
   </property> 
   <property> 
      <name>dfs.data.dir</name>
      <value>/opt/hadoop/hadoop-2.7.2/hadoopinfra/hdfs/datanode</value> 
   </property>
</configuration>
修改Hadoop的yarn配置

vi /opt/hadoop/hadoop-2.7.2/etc/hadoop/yarn-site.xml
yarn-site.xml修改如下

<configuration>
   <property> 
      <name>yarn.nodemanager.aux-services</name> 
      <value>mapreduce_shuffle</value> 
   </property>

 <property>
   <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
    <description>Whether virtual memory limits will be enforced for containers</description>
  </property>
 <property>
   <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value>4</value>
    <description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
  </property>

</configuration>
修改mapred

cp /opt/hadoop/hadoop-2.7.2/etc/hadoop/mapred-site.xml.template /opt/hadoop/hadoop-2.7.2/etc/hadoop/mapred-site.xml

vi /opt/hadoop/hadoop-2.7.2/etc/hadoop/mapred-site.xml
mapred-site.xml修改如下

<configuration>
   <property> 
      <name>mapreduce.framework.name</name> 
      <value>yarn</value> 
   </property>
</configuration>
修改Hadoop环境配置文件

vi /opt/hadoop/hadoop-2.7.2/etc/hadoop/hadoop-env.sh

修改JAVA_HOME
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64/



初始化hadoop

hdfs namenode -format

root账号执行
/opt/hadoop/hadoop-2.7.2/sbin/start-dfs.sh 
/opt/hadoop/hadoop-2.7.2/sbin/start-yarn.sh
临时关闭防火墙

sudo systemctl stop firewalld
浏览器访问Hadoop

访问hadoop的默认端口号为50070

hive

wget https://archive.apache.org/dist/hive/hive-2.3.3/apache-hive-2.3.3-bin.tar.gz

tar xvf apache-hive-2.3.3-bin.tar.gz
sudo mkdir -p /opt/hive
sudo mv apache-hive-2.3.3-bin /opt/hive/

修改配置文件

cd /opt/hive/apache-hive-2.3.3-bin/conf/
sudo cp hive-env.sh.template hive-env.sh
sudo cp hive-default.xml.template hive-site.xml
sudo cp hive-log4j2.properties.template hive-log4j2.properties
sudo cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties

在Hadoop中创建文件夹并设置权限

hadoop fs -mkdir -p /data/hive/warehouse
hadoop fs -mkdir /data/hive/tmp
hadoop fs -mkdir /data/hive/log
hadoop fs -chmod -R 777 /data/hive/warehouse
hadoop fs -chmod -R 777 /data/hive/tmp
hadoop fs -chmod -R 777 /data/hive/log
hadoop fs -mkdir -p /spark-eventlog
修改hive配置文件

sudo cp hive-site.xml hive-site.xml_barkup
sudo vi hive-site.xml
配置文件如下
sudo tee /opt/hive/apache-hive-2.3.3-bin/conf/hive-site.xml <<-'EOF'
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--><configuration>
<property>
  <name>hive.exec.scratchdir</name>
  <value>hdfs://127.0.0.1:9000/data/hive/tmp</value>
</property>
<property>
   <name>hive.metastore.warehouse.dir</name>
  <value>hdfs://127.0.0.1:9000/data/hive/warehouse</value>
</property>
<property>
  <name>hive.querylog.location</name>
  <value>hdfs://127.0.0.1:9000/data/hive/log</value>
</property>

<property>
  <name>hive.metastore.schema.verification</name>
  <value>false</value>
</property>

<property>
  <name>javax.jdo.option.ConnectionURL</name>
   <value>jdbc:mysql://rm-8vbe87b5295dz08zhxo.mysql.zhangbei.rds.aliyuncs.com:3306/hive?useSSL=false</value>
</property>

<property>
  <name>javax.jdo.option.ConnectionDriverName</name>
   <value>com.mysql.jdbc.Driver</value>
</property>

<property>
  <name>javax.jdo.option.ConnectionUserName</name>
  <value>lingcloud</value>
</property>

<property>
  <name>javax.jdo.option.ConnectionPassword</name>
  <value>Wb19831010!</value>
</property>

 <property>
    <name>system:java.io.tmpdir</name>
    <value>/tmp/hive/java</value>
  </property>
  <property>
    <name>system:user.name</name>
    <value>hadoop</value>
  </property>

 <property>
    <name>hive.exec.local.scratchdir</name>
    <value>/opt/hive/apache-hive-2.3.3-bin/tmp/${system:user.name}</value>
    <description>Local scratch space for Hive jobs</description>
  </property>
  <property>
    <name>hive.downloaded.resources.dir</name>
    <value>/opt/hive/apache-hive-2.3.3-bin/tmp/${hive.session.id}_resources</value>
    <description>Temporary local directory for added resources in the remote file system.</description>
  </property>

<property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/opt/hive/apache-hive-2.3.3-bin/tmp/root/operation_logs</value>
    <description>Top level directory where operation logs are stored if logging functionality is enabled</description>
  </property>
</configuration>
EOF
配置hive中jdbc的MySQL驱动

cd /opt/hive/apache-hive-2.3.3-bin/lib/
wget https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-java-5.1.49.tar.gz
tar xvf mysql-connector-java-5.1.49.tar.gz 
cp mysql-connector-java-5.1.49/mysql-connector-java-5.1.49.jar .
配置环境变量

sudo vi /opt/hive/apache-hive-2.3.3-bin/conf/hive-env.sh

export HADOOP_HOME=/opt/hadoop/hadoop-2.7.2
export HIVE_CONF_DIR=/opt/hive/apache-hive-2.3.3-bin/conf
export HIVE_AUX_JARS_PATH=/opt/hive/apache-hive-2.3.3-bin/lib

创建对应数据库

初始化schema

/opt/hive/apache-hive-2.3.3-bin/bin/schematool -dbType mysql -initSchema
初始化完成后修改MySQL链接信息,之后配置MySQL IP 端口以及放元数据的库名称

nohup hive --service metastore >> metastore.log 2>&1 &
nohup hive --service hiveserver2 >> hiveserver2.log 2>&1 &
验证安装

hive -e "show databases"

Spark

su hadoop

wget https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-without-hadoop.tgz

tar xvf spark-2.4.3-bin-without-hadoop.tgz
sudo mkdir -p /opt/spark
sudo mv spark-2.4.3-bin-without-hadoop /opt/spark/

配置spark环境变量以及备份配置文件

cd /opt/spark/spark-2.4.3-bin-without-hadoop/conf/
cp spark-env.sh.template spark-env.sh
cp spark-defaults.conf.template spark-defaults.conf
cp metrics.properties.template metrics.properties
cp workers.template workers
配置程序的环境变量

vi spark-env.sh

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
export HADOOP_HOME=/opt/hadoop/hadoop-2.7.2
export HADOOP_CONF_DIR=/opt/hadoop/hadoop-2.7.2/etc/hadoop
export SPARK_DIST_CLASSPATH=$(/opt/hadoop/hadoop-2.7.2/bin/hadoop classpath)
export SPARK_MASTER_HOST=127.0.0.1
export SPARK_MASTER_PORT=7077
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -
Dspark.history.retainedApplications=50 -
Dspark.history.fs.logDirectory=hdfs://127.0.0.1:9000/spark-eventlog"
修改默认的配置文件

vi spark-defaults.conf

spark.master                     spark://127.0.0.1:7077
spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://127.0.0.1:9000/spark-eventlog
spark.serializer                 org.apache.spark.serializer.KryoSerializer
spark.driver.memory              3g
spark.eventLog.enabled           true
spark.eventLog.dir               hdfs://127.0.0.1:9000/spark-eventlog
spark.eventLog.compress          true
配置工作节点

vi workers

127.0.0.1
配置hive

cp /opt/hive/apache-hive-2.3.3-bin/conf/hive-site.xml /opt/spark/spark-2.4.3-bin-without-hadoop/conf
验证应用程序

su root
/opt/spark/spark-2.4.3-bin-without-hadoop/sbin/start-all.sh
访问集群中的所有应用程序的默认端口号为8080

验证安装

spark-sql -e "show databases"
提示

Error: Failed to load class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.
Failed to load main class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.
You need to build Spark with -Phive and -Phive-thriftserver.
查找原因是因为没有集成hadoop的spark没有hive驱动,按网上的讲法,要么自己编译带驱动版本,要么把驱动文件直接放到jars目录。第一种太麻烦,第二种没成功,我用的第三种方法。下载对应版本集成了hadoop的spark安装包,直接覆盖原来的jars目录

wget https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz
tar xvf spark-2.4.3-bin-hadoop2.7.tgz
cp -rf spark-2.4.3-bin-hadoop2.7/jars/ /opt/spark/spark-2.4.3-bin-without-hadoop/
如果提示缺少MySQL驱动,可以将mysql-connector-java-5.1.49/mysql-connector-java-5.1.49.jar放入到spark的jars目录


如果本地没有相关驱动,执行下面脚本

cd /opt/spark/spark-2.4.3-bin-without-hadoop/jars

wget https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-java-5.1.49.tar.gz
tar xvf mysql-connector-java-5.1.49.tar.gz 
cp mysql-connector-java-5.1.49/mysql-connector-java-5.1.49.jar .

DataSphere Studio

sudo rpm -ivh http://repo.mysql.com/yum/mysql-5.5-community/el/6/x86_64/mysql-community-release-el6-5.noarch.rpm
sudo yum install mysql-community-client mysql-community-devel mysql-community-server php-mysql

准备安装包
DataSphereStudio1.1.0
​%E2%80%8Bgithub.com/WeBankFinTech/DataSphereStudio/releases/tag/1.1.0
unzip -d dss dss_linkis_one-click_install_20220704.zip

sudo yum -y install epel-release
sudo yum install -y python-pip

#python -m pip install matplotlib
python -m pip install "matplotlib<3.0"

修改配置
用户需要对 xx/dss_linkis/conf 目录下的 config.sh 和 db.sh 进行修改

### deploy user
deployUser=hadoop

### Linkis_VERSION
LINKIS_VERSION=1.1.1

### DSS Web
DSS_NGINX_IP=127.0.0.1
DSS_WEB_PORT=8085

### DSS VERSION
DSS_VERSION=1.1.0


############## ############## linkis的其他默认配置信息 start ############## ##############
### Specifies the user workspace, which is used to store the user's script files and log files.
### Generally local directory
##file:// required
WORKSPACE_USER_ROOT_PATH=file:///tmp/linkis/ 
### User's root hdfs path
##hdfs:// required
HDFS_USER_ROOT_PATH=hdfs:///tmp/linkis 
### Path to store job ResultSet:file or hdfs path
##hdfs:// required
RESULT_SET_ROOT_PATH=hdfs:///tmp/linkis 

### Path to store started engines and engine logs, must be local
ENGINECONN_ROOT_PATH=/appcom/tmp

#ENTRANCE_CONFIG_LOG_PATH=hdfs:///tmp/linkis/ ##hdfs:// required

###HADOOP CONF DIR #/appcom/config/hadoop-config
HADOOP_CONF_DIR=/opt/hadoop/hadoop-2.7.2/etc/hadoop
###HIVE CONF DIR  #/appcom/config/hive-config
HIVE_CONF_DIR=/opt/hive/apache-hive-2.3.3-bin/conf
###SPARK CONF DIR #/appcom/config/spark-config
SPARK_CONF_DIR=/opt/spark/spark-2.4.3-bin-without-hadoop/conf
# for install
LINKIS_PUBLIC_MODULE=lib/linkis-commons/public-module

##YARN REST URL  spark engine required
YARN_RESTFUL_URL=http://127.0.0.1:8088

## Engine version conf
#SPARK_VERSION
SPARK_VERSION=2.4.3
##HIVE_VERSION
HIVE_VERSION=2.3.3
PYTHON_VERSION=python2

## LDAP is for enterprise authorization, if you just want to have a try, ignore it.
#LDAP_URL=ldap://localhost:1389/
#LDAP_BASEDN=dc=webank,dc=com
#LDAP_USER_NAME_FORMAT=cn=%s@xxx.com,OU=xxx,DC=xxx,DC=com

################### The install Configuration of all Linkis's Micro-Services #####################
#
#    NOTICE:
#       1. If you just wanna try, the following micro-service configuration can be set without any settings.
#            These services will be installed by default on this machine.
#       2. In order to get the most complete enterprise-level features, we strongly recommend that you install
#          the following microservice parameters
#

###  EUREKA install information
###  You can access it in your browser at the address below:http://${EUREKA_INSTALL_IP}:${EUREKA_PORT}
###  Microservices Service Registration Discovery Center
LINKIS_EUREKA_INSTALL_IP=127.0.0.1
LINKIS_EUREKA_PORT=9600
#LINKIS_EUREKA_PREFER_IP=true

###  Gateway install information
#LINKIS_GATEWAY_INSTALL_IP=127.0.0.1
LINKIS_GATEWAY_PORT=9001

### ApplicationManager
#LINKIS_MANAGER_INSTALL_IP=127.0.0.1
LINKIS_MANAGER_PORT=9101

### EngineManager
#LINKIS_ENGINECONNMANAGER_INSTALL_IP=127.0.0.1
LINKIS_ENGINECONNMANAGER_PORT=9102

### EnginePluginServer
#LINKIS_ENGINECONN_PLUGIN_SERVER_INSTALL_IP=127.0.0.1
LINKIS_ENGINECONN_PLUGIN_SERVER_PORT=9103

### LinkisEntrance
#LINKIS_ENTRANCE_INSTALL_IP=127.0.0.1
LINKIS_ENTRANCE_PORT=9104

###  publicservice
#LINKIS_PUBLICSERVICE_INSTALL_IP=127.0.0.1
LINKIS_PUBLICSERVICE_PORT=9105

### cs
#LINKIS_CS_INSTALL_IP=127.0.0.1
LINKIS_CS_PORT=9108

########## Linkis微服务配置完毕##### 

################### The install Configuration of all DataSphereStudio's Micro-Services #####################
#
#    NOTICE:
#       1. If you just wanna try, the following micro-service configuration can be set without any settings.
#            These services will be installed by default on this machine.
#       2. In order to get the most complete enterprise-level features, we strongly recommend that you install
#          the following microservice parameters
#

### DSS_SERVER
### This service is used to provide dss-server capability.

### project-server
#DSS_FRAMEWORK_PROJECT_SERVER_INSTALL_IP=127.0.0.1
#DSS_FRAMEWORK_PROJECT_SERVER_PORT=9002
### orchestrator-server
#DSS_FRAMEWORK_ORCHESTRATOR_SERVER_INSTALL_IP=127.0.0.1
#DSS_FRAMEWORK_ORCHESTRATOR_SERVER_PORT=9003
### apiservice-server
#DSS_APISERVICE_SERVER_INSTALL_IP=127.0.0.1
#DSS_APISERVICE_SERVER_PORT=9004
### dss-workflow-server
#DSS_WORKFLOW_SERVER_INSTALL_IP=127.0.0.1
#DSS_WORKFLOW_SERVER_PORT=9005
### dss-flow-execution-server
#DSS_FLOW_EXECUTION_SERVER_INSTALL_IP=127.0.0.1
#DSS_FLOW_EXECUTION_SERVER_PORT=9006
###dss-scriptis-server
#DSS_SCRIPTIS_SERVER_INSTALL_IP=127.0.0.1
#DSS_SCRIPTIS_SERVER_PORT=9008

###dss-data-api-server
#DSS_DATA_API_SERVER_INSTALL_IP=127.0.0.1
#DSS_DATA_API_SERVER_PORT=9208
###dss-data-governance-server
#DSS_DATA_GOVERNANCE_SERVER_INSTALL_IP=127.0.0.1
#DSS_DATA_GOVERNANCE_SERVER_PORT=9209
###dss-guide-server
#DSS_GUIDE_SERVER_INSTALL_IP=127.0.0.1
#DSS_GUIDE_SERVER_PORT=9210
########## DSS微服务配置完毕#####

############## ############## other default configuration 其他默认配置信息  ############## ##############

## java application default jvm memory
export SERVER_HEAP_SIZE="512M"


##sendemail配置,只影响DSS工作流中发邮件功能
EMAIL_HOST=smtp.163.com
EMAIL_PORT=25
EMAIL_USERNAME=xxx@163.com
EMAIL_PASSWORD=xxxxx
EMAIL_PROTOCOL=smtp

### Save the file path exported by the orchestrator service
ORCHESTRATOR_FILE_PATH=/appcom/tmp/dss
### Save DSS flow execution service log path
EXECUTION_LOG_PATH=/appcom/tmp/dss
用脚本安装

tee /home/hadoop/dss/conf/db.sh <<-'EOF'


### for DSS-Server and Eventchecker APPCONN
MYSQL_HOST=rm-8vbe87b5295dz08zhxo.mysql.zhangbei.rds.aliyuncs.com
MYSQL_PORT=3306
MYSQL_DB=dss
MYSQL_USER=lingcloud
MYSQL_PASSWORD=Wb19831010!

#主MHscriptis起使\M置认Z$HIVE_CONF_DIR 中DM置G件V
HIVE_META_URL=jdbc:mysql://rm-8vbe87b5295dz08zhxo.mysql.zhangbei.rds.aliyuncs.com:3306/hive?useSSL=false    # HiveMetaCDURL
HIVE_META_USER=lingcloud   # HiveMetaCD�[m
HIVE_META_PASSWORD=Wb19831010!    # HiveMetaCDA
EOF

cd xx/dss_linkis/bin

sh install.sh
等待安装脚本执行完毕,再进到linkis目录里修改对应的配置文件

修改linkis-ps-publicservice.properties配置,否则hive数据库刷新不出来表

linkis.metadata.hive.permission.with-login-user-enabled=false
拷贝缺少的jar

cp /opt/hive/apache-hive-2.3.3-bin/lib/datanucleus-* ~/dss/linkis/lib/linkis-engineconn-plugins/hive/dist/v2.3.3/lib
cp /opt/hive/apache-hive-2.3.3-bin/lib/*jdo*  ~/dss/linkis/lib/linkis-engineconn-plugins/hive/dist/v2.3.3/lib
安装完成后启动

sh start-all.sh
启动完成后eureka注册页面

常用链接

https://github.com/WeBankFinTech/DataSphereStudio-Doc/blob/main/zh_CN/%E5%AE%89%E8%A3%85%E9%83%A8%E7%BD%B2/DSS%E5%8D%95%E6%9C%BA%E9%83%A8%E7%BD%B2%E6%96%87%E6%A1%A3.md


Linkis1.0.2 安装及使用指南 https://www.jianshu.com/p/d0e8b605c4ce

WeDataSphere 常见问题(含DSS,Linkis等)QA文档 https://docs.qq.com/doc/DSGZhdnpMV3lTUUxq

systemctl stop firewalld
systemctl stop firewalld.service #停止firewall
systemctl disable firewalld.service #禁止firewall开机启动

http://192.168.74.135:50070/

http://192.168.74.135:8080

http://192.168.74.135:20303/

http://192.168.74.135:8088

安装准备

yum -y install yum-utils
yum-config-manager --disable mysql80-community	
yum-config-manager --enable mysql57-community
yum repolist enabled | grep mysql
yum install -y  mysql-community-server
yum install -y telnet,tar,sed,dos2unix,unzip,expect

http://nginx.org/en/linux_packages.html#RHEL-CentOS

touch /etc/yum.repos.d/nginx.repo
vi /etc/yum.repos.d/nginx.repo
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=1
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true

[nginx-mainline]
name=nginx mainline repo
baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/
gpgcheck=1
enabled=0
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
yum install yum-utils

yum install -y nginx
whereis nginx

perl

https://www.perl.org/get.html 项目部署#安装nginx依赖:查看是否已经安装

wget https://www.cpan.org/src/5.0/perl-5.34.1.tar.gz
tar -xzf  perl-5.34.1.tar.gz 
cd perl-5.34.1
mv /usr/bin/perl /usr/bin/perl.bak
./Configure -des -Dprefix=/usr/local/perl
make&&make install
perl -v


ln -s /usr/local/perl/bin/perl /usr/bin/perl

mysql

https://www.cnblogs.com/milton/p/15418572.html

wget -i -c http://dev.mysql.com/get/mysql57-community-release-el7-10.noarch.rpm
yum -y install mysql57-community-release-el7-10.noarch.rpm
yum -y install mysql-community-server

b、然后手动下载

wget http://dev.mysql.com/get/mysql-community-release-el7-5.noarch.rpm

c、然后安装该数据库的rpm包

rpm -ivh mysql-community-release-el7-5.noarch.rpm

d、开始安装mysql-server

yum install mysql-server

(2)卸载mysql-community-release-el7-5.noarch

rpm -e --nodeps mysql-community-release-el7-5.noarch

centos8的镜像包不在维护,重装centos后一切都好了

wget https://cdn.mysql.com//Downloads/MySQL-8.0/mysql-8.0.28-1.el8.x86_64.rpm-bundle.tar
tar -xvf mysql-8.0.28-1.el8.x86_64.rpm-bundle.tar
rpm -ivh mysql-community-common-8.0.28-1.el8.x86_64.rpm
rpm -ivh mysql-community-client-plugins-8.0.28-1.el8.x86_64.rpm
rpm -ivh mysql-community-libs-8.0.28-1.el8.x86_64.rpm
rpm -ivh mysql-community-client-8.0.28-1.el8.x86_64.rpm
rpm -ivh  mysql-community-icu-data-files-8.0.28-1.el8.x86_64.rpm
rpm -ivh mysql-community-server-8.0.28-1.el8.x86_64.rpm


mysqld --console查看日志后发现是data文件的问题,将data文件手动删除之后使用mysqld --initalize-insecure 系统自动生成data文件夹及内部文件,再使用mysqld -install 重新安装

rm -rf /var/lib/mysql
mysqld --initalize-insecure
mysqld -install
tail -f /var/log/mysqld.log

安装hadoop2.7.2

http://192.168.74.135:50070/

http://192.168.74.135:8088/


https://blog.csdn.net/qq_44665283/article/details/121329554

mkdir /datasphere
cd /datasphere
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
 tar -zxvf hadoop-2.7.2.tar.gz -C /datasphere

vi /datasphere/hadoop-2.7.2/etc/hadoop/hadoop-env.sh

export JAVA_HOME=/datasphere/jdk1.8.0_91

vi /datasphere/hadoop-2.7.2/etc/hadoop/core-site.xml

<configuration>
    <!-- 指定HDFS老大(namenode)的通信地址 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://192.168.74.135:9000</value>
    </property>
    <!-- 指定hadoop运行时产生文件的存储路径 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/datasphere/hadoop-2.7.2/tmp</value>
    </property>
</configuration>

vi /datasphere/hadoop-2.7.2/etc/hadoop/hdfs-site.xml

<configuration>
    <!-- 设置hdfs副本数量 -->
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
</configuration>

SSH免密登录

  1. 到 root 目录下:
cd /root
  1. 执行生成密钥命令:
ssh-keygen -t rsa
  1. 然后三个回车
  2. 然后复制公钥追加到第一台节点的公钥文件中:
ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.74.135
  1. 选择 yes
  2. 输入登录第一台节点的密码(操作完成该节点公钥复制到第一台节点中)

配置环境变量

vim /etc/profile
export HADOOP_HOME=/datasphere/hadoop-2.7.2/
PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin:$MAVEN_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile

hdfs 启动与停止

第一次启动得先格式化(最好不要复制):

hdfs namenode -format

启动hdfs

start-dfs.sh

(9)开放50070端口

添加永久开放的端口

firewall-cmd --add-port=50070/tcp --permanent
firewall-cmd --reload

http://192.168.74.135:50070/

(10) 配置yarn启动

1、配置mapred-site.xml

cd /datasphere/hadoop-2.7.2/etc/hadoop/
mv mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<configuration>
    <!-- 通知框架MR使用YARN -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>

2、配置yarn-site.xml

<configuration>
    <!-- reducer取数据的方式是mapreduce_shuffle -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>

3、启动yarn

start-yarn.sh

浏览器访问(防火墙开放8088端口):

firewall-cmd --add-port=8088/tcp --permanent
firewall-cmd --reload

http://192.168.74.135:8088/

至此,我们Hadoop的单机模式搭建成功。

Hive2.3.3的安装

https://blog.csdn.net/qq_44665283/article/details/121147347 下载地址:

http://archive.apache.org/dist/hive/hive-2.3.3/

wget http://archive.apache.org/dist/hive/hive-2.3.3/apache-hive-2.3.3-bin.tar.gz
tar -zxvf apache-hive-2.3.3-bin.tar.gz -C /datasphere
mv apache-hive-2.3.3-bin hive-2.3.3

1 解压配置环境变量

  1. 配置环境变量
sudo vi /etc/profile

末尾追加

export  HIVE_HOME=/datasphere/hive-2.3.3
export  PATH=$PATH:$HIVE_HOME/bin

重新编译环境变量生效

source /etc/profile

2 配置Hive文件

2.1 修改hive-env.sh

cp hive-env.sh.template hive-env.sh


# HADOOP_HOME=${bin}/../../hadoop
打开注释修改 HADOOP_HOME=/datasphere/hadoop-2.7.2
# export HIVE_CONF_DIR=
打开注释修改 HIVE_CONF_DIR=/datasphere/hive-2.3.3/conf

2.2 修改hive-log4j.properties

修改hive的log存放日志到/datasphere/hive-2.3.3/logs

cp hive-log4j2.properties.template hive-log4j2.properties


vi hive-log4j2.properties

找到 property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}

修改 property.hive.log.dir = /datasphere/hive-2.3.3/logs

3 配置MySQL作为Metastore

默认情况下, Hive的元数据保存在了内嵌的 derby 数据库里, 但一般情况下生产环境使用 MySQL 来存放 Hive 元数据。

安装mysql,拷贝 mysql-connector-java-5.1.47.jar 放入 $HIVE_HOME/lib 下。

3.2 修改配置文件

参数配置文档:https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin

复制hive-default.xml.template为hive-site.xml 文件,删除掉configuration里的配置信息,重新配置 MySQL 数据库连接信息。

 cp hive-default.xml.template hive-site.xml
touch hive-site.xml
vi hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration> 
<!--Hive作业的HDFS根目录位置 --> 
<property>
    <name>hive.exec.scratchdir</name>
    <value>/user/hive/tmp</value>
</property>
<!--Hive作业的HDFS根目录创建写权限 --> 
<property>
    <name>hive.scratch.dir.permission</name>
    <value>733</value>
</property>
<!--hdfs上hive元数据存放位置 --> 
<property>  
  <name>hive.metastore.warehouse.dir</name>  
  <value>/user/hive/warehouse</value>   
</property>
<!--连接数据库地址,名称 -->  
<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:mysql://rm-8vbe87b5295dz08zhxo.mysql.zhangbei.rds.aliyuncs.com:3306/hive?createDatabaseIfNotExist=true</value>  
</property>  
<!--连接数据库驱动 --> 
<property>
  <name>javax.jdo.option.ConnectionDriverName</name>  
  <value>com.mysql.cj.jdbc.Driver</value>  
</property> 
<!--连接数据库用户名称 -->  
<property>  
  <name>javax.jdo.option.ConnectionUserName</name>  
  <value>lingcloud</value>
</property> 
<!--连接数据库用户密码 -->  
<property>  
  <name>javax.jdo.option.ConnectionPassword</name>  
  <value>Wb19831010!</value>
</property>
<!--客户端显示当前查询表的头信息 --> 
 <property>
  <name>hive.cli.print.header</name>
  <value>true</value>
</property>
<!--客户端显示当前数据库名称信息 --> 
<property>
  <name>hive.cli.print.current.db</name>
  <value>true</value>
</property> 
</configuration>

3.3 mysql创建hive用户密码

CREATE DATABASE hive; 
USE hive; 
CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive';
GRANT ALL ON hive.* TO 'hive'@'localhost' IDENTIFIED BY 'hive'; 
GRANT ALL ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive'; 
FLUSH PRIVILEGES; 

4.1 初始化数据库 从Hive 2.1开始,我们需要运行下面的schematool命令作为初始化步骤。例如,这里使用“mysql”作为db类型。

schematool -dbType mysql -initSchema

执行成功后,可以使用Navicat Premium 查看元数据库 hive 是否已经创建成功。

4.2 启动 Hive 客户端

启动Hadoop服务,使用 Hive CLI(Hive command line interface), **hive --service cli和hive效果一样,**可以在终端输入以下命令

hive

安装spark

https://spark.apache.org/downloads.html

a、下载安装
 wget https://dlcdn.apache.org/spark/spark-3.0.3/spark-3.0.3-bin-hadoop2.7.tgz
b、解压安装包

 tar -zxvf spark-3.0.3-bin-hadoop2.7.tgz
c、修改spark-env.sh文件

 cp spark-env.sh.template spark-env.sh
末尾添加以下内容:

export JAVA_HOME=/datasphere/jdk1.8.0_91
export SPARK_MASTER_IP=192.168.74.135
export SPARK_WORKER_MEMORY=2g
export SPARK_WORKER_CORES=2
export SPARK_WORKER_INSTANCES=1

d、配置环境变量
vim /etc/profile

export SPARK_HOME=/datasphere/spark-3.0.3-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin

source /etc/profile
e、启动

./sbin/start-master.sh

安装dss

hadoop用户

vim /etc/profile
export JAVA_HOME=/datasphere/jdk1.8.0_91
export JRE_HOME=$JAVA_HOME/jre
export JAVA_BIN=$JAVA_HOME/bin
export JAVA_LIB=$JAVA_HOME/lib
export CLASSPATH=.$CLASSPATH:$JAVA_LIB/tools.jar:$JAVA_LIB/dt.jar
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin:

export HADOOP_HOME=/datasphere/hadoop-2.7.2/
export PATH=$PATH:$MAVEN_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

export  HIVE_HOME=/datasphere/hive-2.3.3
export  PATH=$PATH:$HIVE_HOME/bin

export SPARK_HOME=/datasphere/spark-3.0.3-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin

export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HIVE_CONF_DIR=$HIVE_HOME/conf
export FLINK_CONF_DIR=$FLINK_HOME/conf
export FLINK_LIB_DIR=%FLINK_HOME/lib
export SPARK_CONF_DIR=$SPARK_HOME/conf
source /etc/profile


unzip -o DSS-Linkis全家桶20220223.zip -d dss

如果有问题userdel -r hadoop

adduser hadoop
passwd hadoop
usermod -a -G hadoop hadoop

cat /etc/passwd | grep hadoop

useradd hadoop -g hadoop
vi /etc/sudoers
hadoop ALL=(ALL) NOPASSWD: NOPASSWD: ALL


vim /home/hadoop/.bash_rc

同profile

检查环境

ENGINECONN_ROOT_PATH为本地目录,需要用户提前创建,并且完成授权,授权命令chmod -R 777 /目录,若为 Linkis1.0.2 版本,不必提前创建与授权,会在脚本、程序中自动创建与授权。

HDFS_USER_ROOT_PATH为 HDFS 上的路径,需要提前创建,且完成授权,授权命令hadoop fs -chmod -R 777 /目录。

yum install gcc,zlib -y

sh bin/checkEnv.sh

dnf install python3
alternatives --set python /usr/bin/python3
dnf install python2
alternatives --set python /usr/bin/python2
pip install --upgrade pip
python -m pip install matplotlib

如果要删除默认的python命令,请输入:lternatives --auto python

配置

vi conf/db.sh
MYSQL_HOST=rm-8vbe87b5295dz08zhxo.mysql.zhangbei.rds.aliyuncs.com
MYSQL_PORT=3306
MYSQL_DB=dss
MYSQL_USER=lingcloud
MYSQL_PASSWORD=Wb19831010!

##hive的配置
HIVE_HOST=rm-8vbe87b5295dz08zhxo.mysql.zhangbei.rds.aliyuncs.com
HIVE_PORT=3306
HIVE_DB=hive
HIVE_USER=lingcloud
HIVE_PASSWORD=Wb19831010!


vi conf/config.sh
###HADOOP CONF DIR #/appcom/config/hadoop-config
HADOOP_CONF_DIR=/datasphere/hadoop-2.7.2/etc/hadoop
###HIVE CONF DIR  #/appcom/config/hive-config
HIVE_CONF_DIR=/datasphere/hive-2.3.3/conf
###SPARK CONF DIR #/appcom/config/spark-config
SPARK_CONF_DIR=/datasphere/spark-3.0.3-bin-hadoop2.7/conf


启动

启动hadoop

start-dfs.sh

第一次运行,否则不要运行

/datasphere/dss/bin/install.sh

启动

/datasphere/dss/bin/start-all.sh

停止

/datasphere/dss/bin/stop-all.sh

单个启动

cd /datasphere/dss/dss/sbin
sh dss-daemon.sh start dss-framework-project-server
sh dss-daemon.sh start dss-framework-orchestrator-server


cd /datasphere/dss/linkis/logs
cd /datasphere/dss/dss/logs
tail -f /datasphere/dss/dss/logs/dss-framework-project-server.out
tail -f /datasphere/dss/linkis/logs/linkis-ps-publicservice.log


tail -f /datasphere/dss/dss/logs/dss-framework-orchestrator-server.out

问题处理

javafx.util.Pair

rpm -qa | grep java
rpm -e --nodeps java
 java -version

安装oracle jdk