准备

安装MySql 8+ 客户端

用于初始化数据和检查

登入方式:

mysql -h <HOST_IP> -u<USER> -p

查看版本:

select version();

安装Redis 5+ 客户端

用于初始化数据和检查。

登入方式:

redis-cli -h <HOST_IP> -p <PORT> -a <PASSWORD>

查看版本:

info

安装Kerberos 客户端

在DS集群各节点安装大数据客户端

测试客户端:

source /opt/client/bigdata_env
kinit <user>
<password>
hdfs dfs -ls / -- 检查是否能访问hdfs

验证网络和端口连通

telnet 
netstat
curl 
ssh -v -p <PORT> <HOST_IP>

验证JDBC

java -cp .;mysql-connector-java-8.0.28.jar JdbcTest <USER> <PASSWORD> com.mysql.cj.jdbc.Driver "jdbc:mysql://localhost:3306/sys?useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=UTC" "select sysdate()"

java -cp .;postgresql-42.2.5.jar JdbcTest <USER> <PASSWORD> org.postgresql.Driver "jdbc:postgresql://localhost:5432/postgres" "select now()"
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Statement;

// javac JdbcTest.java
public class JdbcTest {

	public static void main(String[] args) throws Exception {
		Connection conn = null;
		Statement stmt = null;

		if (args == null || args.length < 5) {
			System.out.println("Require args: <username> <password> <driver-class> <jdbc-url> <sql>");
			return;
		}

		String USER = args[0];
		String PASS = args[1];
		String JDBC_DRIVER = args[2];
		String DB_URL = args[3];
		String sql = args[4];

		Class.forName(JDBC_DRIVER);

		System.out.println("Connect ...");
		conn = DriverManager.getConnection(DB_URL, USER, PASS);

		stmt = conn.createStatement();
		ResultSet rs = stmt.executeQuery(sql);

		ResultSetMetaData rsmd = rs.getMetaData();

		int colCount = rsmd.getColumnCount();

		for (int i = 1; i <= colCount; i++) {
			String columnName = rsmd.getColumnLabel(i);
			System.out.print(columnName);
			System.out.print("\t");
		}
		System.out.println();
		while (rs.next()) {
			for (int i = 1; i <= colCount; i++) {
				Object obj = rs.getObject(i);
				System.out.print(obj);
				System.out.print("\t");
			}
			System.out.println();
		}

		rs.close();
		stmt.close();
		conn.close();
	}
}

验证连接 Hadoop,Hive,Spark

登入hive

source /opt/client/bigdata_env
kinit <user> 
<password>
beeline

IP 换成 hacluster

spark.sql("select 1").write.mode("error").csv("hdfs://10.167.134.141:8020/user/root/data_quality_error_data/test")

spark.sql("select 1").write.mode("overwrite").csv("hdfs://hacluster/user/root/data_quality_error_data/test")

原因:和开源的访问方式不一样,因为mrs是开启了kerberos认证,并且有token验证,所有只能使用域名访问。

HDFS初始化问题:添加配置

<property>
    <name>dfs.client.failover.proxy.provider.hacluster</name>
     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

找不到mysql-connector,spark不会去本地的配置,会在Hdfs上面查找。 需要把mysql-connector的Jar 放到hdfs对应的路径下,如:/user/spark2x/jars/8.1.0/spark-archive-2x.zip。

检查 JDK 8

java -version