准备
安装MySql 8+ 客户端
用于初始化数据和检查
登入方式:
mysql -h <HOST_IP> -u<USER> -p
查看版本:
select version();
安装Redis 5+ 客户端
用于初始化数据和检查。
登入方式:
redis-cli -h <HOST_IP> -p <PORT> -a <PASSWORD>
查看版本:
info
安装Kerberos 客户端
在DS集群各节点安装大数据客户端
测试客户端:
source /opt/client/bigdata_env
kinit <user>
<password>
hdfs dfs -ls / -- 检查是否能访问hdfs
验证网络和端口连通
telnet
netstat
curl
ssh -v -p <PORT> <HOST_IP>
验证JDBC
java -cp .;mysql-connector-java-8.0.28.jar JdbcTest <USER> <PASSWORD> com.mysql.cj.jdbc.Driver "jdbc:mysql://localhost:3306/sys?useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=UTC" "select sysdate()"
java -cp .;postgresql-42.2.5.jar JdbcTest <USER> <PASSWORD> org.postgresql.Driver "jdbc:postgresql://localhost:5432/postgres" "select now()"
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Statement;
// javac JdbcTest.java
public class JdbcTest {
public static void main(String[] args) throws Exception {
Connection conn = null;
Statement stmt = null;
if (args == null || args.length < 5) {
System.out.println("Require args: <username> <password> <driver-class> <jdbc-url> <sql>");
return;
}
String USER = args[0];
String PASS = args[1];
String JDBC_DRIVER = args[2];
String DB_URL = args[3];
String sql = args[4];
Class.forName(JDBC_DRIVER);
System.out.println("Connect ...");
conn = DriverManager.getConnection(DB_URL, USER, PASS);
stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(sql);
ResultSetMetaData rsmd = rs.getMetaData();
int colCount = rsmd.getColumnCount();
for (int i = 1; i <= colCount; i++) {
String columnName = rsmd.getColumnLabel(i);
System.out.print(columnName);
System.out.print("\t");
}
System.out.println();
while (rs.next()) {
for (int i = 1; i <= colCount; i++) {
Object obj = rs.getObject(i);
System.out.print(obj);
System.out.print("\t");
}
System.out.println();
}
rs.close();
stmt.close();
conn.close();
}
}
验证连接 Hadoop,Hive,Spark
登入hive
source /opt/client/bigdata_env
kinit <user>
<password>
beeline
IP 换成 hacluster
spark.sql("select 1").write.mode("error").csv("hdfs://10.167.134.141:8020/user/root/data_quality_error_data/test")
spark.sql("select 1").write.mode("overwrite").csv("hdfs://hacluster/user/root/data_quality_error_data/test")
原因:和开源的访问方式不一样,因为mrs是开启了kerberos认证,并且有token验证,所有只能使用域名访问。
HDFS初始化问题:添加配置
<property>
<name>dfs.client.failover.proxy.provider.hacluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
找不到mysql-connector,spark不会去本地的配置,会在Hdfs上面查找。 需要把mysql-connector的Jar 放到hdfs对应的路径下,如:/user/spark2x/jars/8.1.0/spark-archive-2x.zip。
检查 JDK 8
java -version