kafka storm hbase-IT技术-益华网络-网站建设_小程序开发|网页设计

kafka storm hbase

发布时间：2025-05-25 00:10:07 作者：益华网络来源：undefined 浏览量（1）点赞（1）

摘要：kafka+storm+hbase实现计算WordCount。（1）表名：wc

kafka+storm+hbase实现计算WordCount。

（1）表名：wc

（2）列族：result

（3）RowKey：word

（4）Field：count

1、 解决：

（ 1 ）第一步：首先准备 kafka 、 storm 和 hbase 相关 jar 包。 依赖如下 ：

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

<artifactId>kafkaSpout</artifactId>

<version> 0.0 . 1 -SNAPSHOT</version>

<groupId>org.apache.storm</groupId>

<artifactId>storm-core</artifactId>

</dependency>

<groupId>org.apache.storm</groupId>

<artifactId>storm-kafka</artifactId>

</dependency>

<groupId>org.apache.kafka</groupId>

<artifactId>kafka_2. 10 </artifactId>

<groupId>org.apache.zookeeper</groupId>

<artifactId>zookeeper</artifactId>

</exclusion>

</exclusion>

</exclusions>

</dependency>

<groupId>org.apache.hbase</groupId>

<artifactId>hbase-client</artifactId>

</exclusion>

<groupId>org.apache.zookeeper</groupId>

<artifactId>zookeeper</artifactId>

</exclusion>

</exclusions>

</dependency>

<groupId>com.google.protobuf</groupId>

<artifactId>protobuf-java</artifactId>

</dependency>

<groupId>org.apache.curator</groupId>

<artifactId>curator-framework</artifactId>

</exclusion>

</exclusion>

</exclusions>

</dependency>

<groupId>jdk.tools</groupId>

<artifactId>jdk.tools</artifactId>

<scope>system</scope>

<systemPath>C:\Program Files\Java\jdk1. 7 .0_51\lib\tools.jar</systemPath>

</dependency>

</dependencies>

<id>central</id>

<url>http: //repo1.maven.org/maven2/</url>

<enabled> false </enabled>

</snapshots>

</releases>

</repository>

<id>clojars</id>

<url>https: //clojars.org/repo/</url>

</snapshots>

</releases>

</repository>

<id>scala-tools</id>

<url>http: //scala-tools.org/repo-releases</url>

</snapshots>

</releases>

</repository>

<id>conjars</id>

<url>http: //conjars.org/repo/</url>

</snapshots>

</releases>

</repository>

</repositories>

<build>

<groupId>org.apache.maven.plugins</groupId>

<artifactId>maven-compiler-plugin</artifactId>

</configuration>

</plugin>

<artifactId>maven-assembly-plugin</artifactId>

<descriptorRef>jar-with-dependencies</descriptorRef>

</descriptorRefs>

</manifest>

</archive>

</configuration>

<id>make-assembly</id>

<phase> package </phase>

<goals>

<goal>single</goal>

</goals>

</execution>

</executions>

</plugin>

</plugins>

</build>

</project>

（2） 将 kafka 发来的数据通过 levelSplit 的 bolt 进行分割处理，然后再发送到下一个 Bolt 中。代码如下：

package com.kafka.spout;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import backtype.storm.topology.BasicOutputCollector;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.topology.base.BaseBasicBolt;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Tuple;

import backtype.storm.tuple.Values;

public class LevelSplit extends BaseBasicBolt {

public void execute(Tuple tuple, BasicOutputCollector collector) {

String words = tuple.getString( 0 ).toString(); //the cow jumped over the moon

String []va=words.split( " " );

for (String word : va)

{

collector.emit( new Values(word));

}

public void declareOutputFields(OutputFieldsDeclarer declarer) {

declarer.declare( new Fields( "word" ));

}

（3） 将levelSplit 的Bolt 发来的数据到levelCount 的Bolt 中进行计数处理，然后发送到hbase （Bolt ）中。代码如下：

package com.kafka.spout;

import java.util.HashMap;

import java.util.Map;

import java.util.Map.Entry;

import backtype.storm.topology.BasicOutputCollector;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.topology.base.BaseBasicBolt;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Tuple;

import backtype.storm.tuple.Values;

public class LevelCount extends BaseBasicBolt {

Map<String, Integer> counts = new HashMap<String, Integer>();

public void execute(Tuple tuple, BasicOutputCollector collector) {

// TODO Auto-generated method stub

String word = tuple.getString( 0 );

Integer count = counts.get(word);

if (count == null )

count = 0 ;

count++;

counts.put(word, count);

for (Entry<String, Integer> e : counts.entrySet()) {

//sum += e.getValue();

System.out.println(e.getKey()

+ "----------->" +e.getValue());

}

collector.emit( new Values(word, count));

}

public void declareOutputFields(OutputFieldsDeclarer declarer) {

// TODO Auto-generated method stub

declarer.declare( new Fields( "word" , "count" ));

}

（4） 准备连接 kafka 和 hbase 条件以及 设置整个拓扑结构并且提交拓扑。代码如下：

package com.kafka.spout;

import java.util.HashMap;

import java.util.Map;

import com.google.common.collect.Maps;

//import org.apache.storm.guava.collect.Maps;

import backtype.storm.Config;

import backtype.storm.LocalCluster;

import backtype.storm.StormSubmitter;

import backtype.storm.generated.AlreadyAliveException;

import backtype.storm.generated.InvalidTopologyException;

import backtype.storm.spout.SchemeAsMultiScheme;

import backtype.storm.topology.TopologyBuilder;

import backtype.storm.tuple.Fields;

import backtype.storm.utils.Utils;

import storm.kafka.BrokerHosts;

import storm.kafka.KafkaSpout;

import storm.kafka.SpoutConfig;

import storm.kafka.ZkHosts;

public class StormKafkaTopo {

public static void main(String[] args) {

BrokerHosts brokerHosts = new ZkHosts( "zeb,yjd,ylh" );

SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, "yjd" , "/storm" , "kafkaspout" );

Config conf = new Config();

spoutConfig.scheme = new SchemeAsMultiScheme( new MessageScheme());

SimpleHBaseMapper mapper = new SimpleHBaseMapper();

mapper.withColumnFamily( "result" );

mapper.withColumnFields( new Fields( "count" ));

mapper.withRowKeyField( "word" );

Map<String, Object> map = Maps.newTreeMap();

map.put( "hbase.rootdir" , "hdfs://zeb:9000/hbase" );

map.put( "hbase.zookeeper.quorum" , "zeb:2181,yjd:2181,ylh:2181" );

// hbase-bolt

HBaseBolt hBaseBolt = new HBaseBolt( "wc" , mapper).withConfigKey( "hbase.conf" );

conf.setDebug( true );

conf.put( "hbase.conf" , map);

TopologyBuilder builder = new TopologyBuilder();

builder.setSpout( "spout" , new KafkaSpout(spoutConfig));

builder.setBolt( "split" , new LevelSplit(), 1 ).shuffleGrouping( "spout" );

builder.setBolt( "count" , new LevelCount(), 1 ).fieldsGrouping( "split" , new Fields( "word" ));

builder.setBolt( "hbase" , hBaseBolt, 1 ).shuffleGrouping( "count" );

if (args != null && args.length > 0 ) {

//提交到集群运行

try {

StormSubmitter.submitTopology(args[ 0 ], conf, builder.createTopology());

} catch (AlreadyAliveException e) {

e.printStackTrace();

} catch (InvalidTopologyException e) {

e.printStackTrace();

}

} else {

//本地模式运行

LocalCluster cluster = new LocalCluster();

cluster.submitTopology( "Topotest1121" , conf, builder.createTopology());

Utils.sleep( 1000000 );

cluster.killTopology( "Topotest1121" );

cluster.shutdown();

}

（5） 在kafka 端用控制台生产数据，如下：

2、 运行结果截图：

3、 遇到的问题：

（1 ）把所有的工作做好后，提交了拓扑，运行代码。发生了错误1 ，如下：

解决：原来是因为依赖版本要统一的问题，最后将版本修改一致后，成功解决。

（2） 发生了错误2 ，如下：

解决：原来是忘记开hbase 中的HMaster 和HRegionServer 。启动后问题成功解决。

http://shenzhen.offcn.com/

扫一扫，关注我们

声明：本文由【益华网络】编辑上传发布，转载此文章须经作者同意，并请附上出处【益华网络】及本页链接。如内容、图片有任何版权问题，请联系我们进行处理。

上一篇：超详细的设置Oracle用户永不被锁教程分享

下一篇：数据库软件架构，到底要设计些什么？

用网站演绎您的企业精髓！

网站首页

服务项目

解决方案

案例展示

关于我们

新闻动态

联系我们

kafka storm hbase

相关新闻

感兴趣吗？