本篇文章是讲在springboot项目中怎么去连接hive数据仓库,并且进行简单的增删改查操作。
1.加入相关依赖如下:
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>1.1.0</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>2.6.0</version>
- </dependency>
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-starter-jdbc</artifactId>
- </dependency>
- <dependency>
- <groupId>jdk.tools</groupId>
- <artifactId>jdk.tools</artifactId>
- <version>1.8</version>
- <scope>system</scope>
- <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
- </dependency>
-
如果上述依赖在加入过程中出现错误,可能是与springboot的版本冲突导致,改为相应版本即可。其中jdk.tools改为你对应的jdk版本。从这里可以看到加入了hive-jdbc依赖,该依赖是用于对hive仓库进行crud操作的。
2.配置yml文件
- hive:
- url: jdbc:hive2://ip地址:端口/;auth=noSasl
- driver-class-name: org.apache.hive.jdbc.HiveDriver
- type: com.alibaba.druid.pool.DruidDataSource
- user: 用户名
- password: 密码
- # 下面为连接池的补充设置,应用到上面所有数据源中
- # 初始化大小,最小,最大
- initialSize: 1
- minIdle: 3
- maxActive: 20
- # 配置获取连接等待超时的时间
- maxWait: 60000
- # 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒
- timeBetweenEvictionRunsMillis: 60000
- # 配置一个连接在池中最小生存的时间,单位是毫秒
- minEvictableIdleTimeMillis: 30000
- validationQuery: select 1
- testWhileIdle: true
- testOnBorrow: false
- testOnReturn: false
- # 打开PSCache,并且指定每个连接上PSCache的大小
- poolPreparedStatements: true
- maxPoolPreparedStatementPerConnectionSize: 20
- connectionErrorRetryAttempts: 0
- breakAfterAcquireFailure: true
-
注意将上述中ip地址、端口、用户名、密码等字样替换为你hive仓库相应的数据。
- import com.alibaba.druid.pool.DruidDataSource;
- import org.springframework.beans.factory.annotation.Qualifier;
- import org.springframework.boot.context.properties.ConfigurationProperties;
- import org.springframework.context.annotation.Bean;
- import org.springframework.context.annotation.Configuration;
- import org.springframework.jdbc.core.JdbcTemplate;
- import javax.sql.DataSource;
-
- /**
- * hive的相关配置类
- * @author zyl
- * @date 2019年4月1日
- */
-
- @Configuration
- @ConfigurationProperties(prefix = "hive")
- public class HiveDruidConfig {
-
- private String url;
- private String user;
- private String password;
- private String driverClassName;
- private int initialSize;
- private int minIdle;
- private int maxActive;
- private int maxWait;
- private int timeBetweenEvictionRunsMillis;
- private int minEvictableIdleTimeMillis;
- private String validationQuery;
- private boolean testWhileIdle;
- private boolean testOnBorrow;
- private boolean testOnReturn;
- private boolean poolPreparedStatements;
- private int maxPoolPreparedStatementPerConnectionSize;
- private int connectionErrorRetryAttempts;
- private boolean breakAfterAcquireFailure;
-
- @Bean(name = "hiveDruidDataSource")
- @Qualifier("hiveDruidDataSource")
- public DataSource dataSource() {
- DruidDataSource datasource = new DruidDataSource();
- datasource.setUrl(url);
- datasource.setUsername(user);
- datasource.setPassword(password);
- datasource.setDriverClassName(driverClassName);
- datasource.setInitialSize(initialSize);
- datasource.setMinIdle(minIdle);
- datasource.setMaxActive(maxActive);
- datasource.setMaxWait(maxWait);
- datasource.setTimeBetweenEvictionRunsMillis(timeBetweenEvictionRunsMillis);
- datasource.setMinEvictableIdleTimeMillis(minEvictableIdleTimeMillis);
- datasource.setValidationQuery(validationQuery);
- datasource.setTestWhileIdle(testWhileIdle);
- datasource.setTestOnBorrow(testOnBorrow);
- datasource.setTestOnReturn(testOnReturn);
- datasource.setPoolPreparedStatements(poolPreparedStatements);
- datasource.setMaxPoolPreparedStatementPerConnectionSize(maxPoolPreparedStatementPerConnectionSize);
- datasource.setConnectionErrorRetryAttempts(connectionErrorRetryAttempts);
- datasource.setBreakAfterAcquireFailure(breakAfterAcquireFailure);
- return datasource;
- }
-
- @Bean(name = "hiveDruidTemplate")
- public JdbcTemplate hiveDruidTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
- return new JdbcTemplate(dataSource);
- }
-
- public int getConnectionErrorRetryAttempts() {
- return connectionErrorRetryAttempts;
- }
-
- public void setConnectionErrorRetryAttempts(int connectionErrorRetryAttempts) {
- this.connectionErrorRetryAttempts = connectionErrorRetryAttempts;
- }
-
- public boolean isBreakAfterAcquireFailure() {
- return breakAfterAcquireFailure;
- }
-
- public void setBreakAfterAcquireFailure(boolean breakAfterAcquireFailure) {
- this.breakAfterAcquireFailure = breakAfterAcquireFailure;
- }
-
- public String getUrl() {
- return url;
- }
-
- public void setUrl(String url) {
- this.url = url;
- }
-
- public String getUser() {
- return user;
- }
-
- public void setUser(String user) {
- this.user = user;
- }
-
- public String getPassword() {
- return password;
- }
-
- public void setPassword(String password) {
- this.password = password;
- }
-
- public String getDriverClassName() {
- return driverClassName;
- }
-
- public void setDriverClassName(String driverClassName) {
- this.driverClassName = driverClassName;
- }
-
- public int getInitialSize() {
- return initialSize;
- }
-
- public void setInitialSize(int initialSize) {
- this.initialSize = initialSize;
- }
-
- public int getMinIdle() {
- return minIdle;
- }
-
- public void setMinIdle(int minIdle) {
- this.minIdle = minIdle;
- }
-
- public int getMaxActive() {
- return maxActive;
- }
-
- public void setMaxActive(int maxActive) {
- this.maxActive = maxActive;
- }
-
- public int getMaxWait() {
- return maxWait;
- }
-
- public void setMaxWait(int maxWait) {
- this.maxWait = maxWait;
- }
-
- public int getTimeBetweenEvictionRunsMillis() {
- return timeBetweenEvictionRunsMillis;
- }
-
- public void setTimeBetweenEvictionRunsMillis(int timeBetweenEvictionRunsMillis) {
- this.timeBetweenEvictionRunsMillis = timeBetweenEvictionRunsMillis;
- }
-
- public int getMinEvictableIdleTimeMillis() {
- return minEvictableIdleTimeMillis;
- }
-
- public void setMinEvictableIdleTimeMillis(int minEvictableIdleTimeMillis) {
- this.minEvictableIdleTimeMillis = minEvictableIdleTimeMillis;
- }
-
- public String getValidationQuery() {
- return validationQuery;
- }
-
- public void setValidationQuery(String validationQuery) {
- this.validationQuery = validationQuery;
- }
-
- public boolean isTestWhileIdle() {
- return testWhileIdle;
- }
-
- public void setTestWhileIdle(boolean testWhileIdle) {
- this.testWhileIdle = testWhileIdle;
- }
-
- public boolean isTestOnBorrow() {
- return testOnBorrow;
- }
-
- public void setTestOnBorrow(boolean testOnBorrow) {
- this.testOnBorrow = testOnBorrow;
- }
-
- public boolean isTestOnReturn() {
- return testOnReturn;
- }
-
- public void setTestOnReturn(boolean testOnReturn) {
- this.testOnReturn = testOnReturn;
- }
-
- public boolean isPoolPreparedStatements() {
- return poolPreparedStatements;
- }
-
- public void setPoolPreparedStatements(boolean poolPreparedStatements) {
- this.poolPreparedStatements = poolPreparedStatements;
- }
-
- public int getMaxPoolPreparedStatementPerConnectionSize() {
- return maxPoolPreparedStatementPerConnectionSize;
- }
-
- public void setMaxPoolPreparedStatementPerConnectionSize(int maxPoolPreparedStatementPerConnectionSize) {
- this.maxPoolPreparedStatementPerConnectionSize = maxPoolPreparedStatementPerConnectionSize;
- }
- }
-
该配置类实际是配置DruidDataSource和JdbcTemplate。注意@ConfigurationProperties(prefix = “hive”)这句是将你yml文件中hive节点下的值注入到同名属性中去。
使用jdbc操作hive跟使用jdbc操作mysql等关系型数据库基本一样,下面给出一个样例说明。
- import com.alibaba.fastjson.JSONArray;
- import com.alibaba.fastjson.JSONObject;
- import org.springframework.beans.factory.annotation.Autowired;
- import org.springframework.beans.factory.annotation.Qualifier;
- import javax.sql.DataSource;
- import java.sql.*;
-
- public class Test {
- @Autowired
- @Qualifier("hiveDruidDataSource")
- private DataSource druidDataSource;
-
- public static String getData(DataSource druidDataSource,String sqlString,String resultName) {
- Connection conn = null;
- Statement statement = null;
- ResultSet rs = null;
- JSONObject result = null;
- try {
- conn = druidDataSource.getConnection();
- statement = conn.createStatement();
- result = new JSONObject();
- result.put("state", "0");
- JSONArray array = new JSONArray();
- rs = statement.executeQuery(sqlString.toString());
- ResultSetMetaData metaData = rs.getMetaData();
- int columnCount = metaData.getColumnCount();
- while (rs.next()) {
- JSONObject jsonObj = new JSONObject();
- // 遍历每一列
- for (int i = 1; i <= columnCount; i++) {
- String columnName = metaData.getColumnLabel(i);
- String value = rs.getString(columnName);
- jsonObj.put(columnName, value);
- }
- array.add(jsonObj);
- }
- result.put("analysisMACResult", array.toString());
- } catch (SQLException e) {
- e.printStackTrace();
- return "{\"state\":\"1\"}";
- } finally {
- try {
- conn.close();
- statement.close();
- rs.close();
- } catch (SQLException e) {
- e.printStackTrace();
- return "{\"state\":\"1\"}";
- }
- }
- return result.toString();
- }
- }
-
仔细看一下这个操作类,其实跟jdbc操作mysql没区别。但要注意@Qualifier(“hiveDruidDataSource”)中的hiveDruidDataSource这个对象名,其实是你在配置类中定义的。上述给出的是查询操作,其他操作也类似,如果你使用过jdbc操作过关系型数据库,我想我不用多说了。介绍就到这了,如有误解欢迎指正。