-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
341 additions
and
1 deletion.
There are no files selected for viewing
58 changes: 58 additions & 0 deletions
58
hadoop-demo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/WordCountDriver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount; | ||
|
||
import com.tomshidi.hadoop.mapreduce.wordcount.partitioner.WordCountPartitioner; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Job; | ||
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; | ||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | ||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* @author TomShiDi | ||
* @since 2023/5/24 15:51 | ||
*/ | ||
public class WordCountDriver { | ||
|
||
/** | ||
* 数据输入、MapTask、分区(Partitioner)、排序、Combiner、分组、ReduceTask | ||
* @param args | ||
* @throws IOException | ||
* @throws InterruptedException | ||
* @throws ClassNotFoundException | ||
*/ | ||
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { | ||
// 1.获取job | ||
Configuration configuration = new Configuration(); | ||
Job job = Job.getInstance(configuration, "wordcount"); | ||
// 2.设置jar包路径 | ||
job.setJarByClass(WordCountDriver.class); | ||
// 3.关联mapper和reducer | ||
job.setMapperClass(WordCountMapper.class); | ||
job.setReducerClass(WordCountReducer.class); | ||
// 4.设置map输出的kv类型 | ||
job.setMapOutputKeyClass(Text.class); | ||
job.setMapOutputValueClass(IntWritable.class); | ||
// 5.设置最终输出的kv类型 | ||
job.setOutputKeyClass(Text.class); | ||
job.setOutputValueClass(IntWritable.class); | ||
// 设置文件合并输入 | ||
job.setInputFormatClass(CombineTextInputFormat.class); | ||
CombineTextInputFormat.setMaxInputSplitSize(job, 4194304); | ||
// 设置自定义分区类 | ||
job.setPartitionerClass(WordCountPartitioner.class); | ||
// 设置Reducer个数,需要与WordCountPartitioner中的分区数对应 | ||
job.setNumReduceTasks(5); | ||
// 6.设置输入路径和输出路径 | ||
FileInputFormat.setInputPaths(job, new Path("D:\\Personal-Projects\\tomshidi-springcloud-demo\\hadoop-demo\\src\\main\\resources\\input\\wordcount")); | ||
FileOutputFormat.setOutputPath(job, new Path("D:\\Personal-Projects\\tomshidi-springcloud-demo\\hadoop-demo\\src\\main\\resources\\output\\wordcount")); | ||
// 7.提交job | ||
boolean result = job.waitForCompletion(true); | ||
|
||
System.exit(result ? 0 : 1); | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
hadoop-demo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/WordCountMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount; | ||
|
||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.LongWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Mapper; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* @author TomShiDi | ||
* @date 2023/5/24 15:26 | ||
*/ | ||
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { | ||
|
||
private Text outK = new Text(); | ||
|
||
private IntWritable outV = new IntWritable(); | ||
|
||
@Override | ||
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { | ||
String line = value.toString(); | ||
String[] words = line.split(" "); | ||
|
||
for (String word : words) { | ||
outK.set(word); | ||
outV.set(1); | ||
context.write(outK, outV); | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
hadoop-demo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/WordCountReducer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount; | ||
|
||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Reducer; | ||
|
||
import java.io.IOException; | ||
|
||
|
||
/** | ||
* @author TomShiDi | ||
* @since 2023/5/24 15:27 | ||
*/ | ||
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { | ||
|
||
private IntWritable outV = new IntWritable(); | ||
|
||
@Override | ||
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { | ||
int sum = 0; | ||
for (IntWritable value : values) { | ||
sum = sum + value.get(); | ||
} | ||
outV.set(sum); | ||
context.write(key, outV); | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
...emo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/combiner/WordCountCombiner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount.combiner; | ||
|
||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Reducer; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* @author TomShiDi | ||
* @since 2023/5/25 21:31 | ||
*/ | ||
public class WordCountCombiner extends Reducer<Text, IntWritable, Text, IntWritable> { | ||
|
||
private IntWritable outV = new IntWritable(); | ||
|
||
@Override | ||
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { | ||
int count = 0; | ||
for (IntWritable value : values) { | ||
count = count + value.get(); | ||
} | ||
outV.set(count); | ||
context.write(key, outV); | ||
} | ||
} |
46 changes: 46 additions & 0 deletions
46
...-demo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/combiner/WordCountDriver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount.combiner; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Job; | ||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | ||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* @author TomShiDi | ||
* @since 2023/5/24 15:51 | ||
*/ | ||
public class WordCountDriver { | ||
|
||
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { | ||
// 1.获取job | ||
Configuration configuration = new Configuration(); | ||
Job job = Job.getInstance(configuration); | ||
// 2.设置jar包路径 | ||
job.setJarByClass(WordCountDriver.class); | ||
// 3.关联mapper和reducer | ||
job.setMapperClass(WordCountMapper.class); | ||
job.setReducerClass(WordCountReducer.class); | ||
// 4.设置map输出的kv类型 | ||
job.setMapOutputKeyClass(Text.class); | ||
job.setMapOutputValueClass(IntWritable.class); | ||
// 5.设置最终输出的kv类型 | ||
job.setOutputKeyClass(Text.class); | ||
job.setOutputValueClass(IntWritable.class); | ||
|
||
// 设置后,会在MapTask上运行一次初步合并,减轻ReducerTask压力 | ||
job.setCombinerClass(WordCountReducer.class); | ||
|
||
// 6.设置输入路径和输出路径 | ||
FileInputFormat.setInputPaths(job, new Path("D:\\Personal-Projects\\hdfsclient\\src\\main\\resources\\input.txt")); | ||
FileOutputFormat.setOutputPath(job, new Path("D:\\Personal-Projects\\hdfsclient\\src\\main\\resources\\output-combiner")); | ||
// 7.提交job | ||
boolean result = job.waitForCompletion(true); | ||
|
||
System.exit(result ? 0 : 1); | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
...-demo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/combiner/WordCountMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount.combiner; | ||
|
||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.LongWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Mapper; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* @author TomShiDi | ||
* @date 2023/5/24 15:26 | ||
*/ | ||
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { | ||
|
||
private Text outK = new Text(); | ||
|
||
private IntWritable outV = new IntWritable(); | ||
|
||
@Override | ||
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { | ||
String line = value.toString(); | ||
String[] words = line.split(" "); | ||
|
||
for (String word : words) { | ||
outK.set(word); | ||
outV.set(1); | ||
context.write(outK, outV); | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
...demo/src/main/java/com/tomshidi/hadoop/mapreduce/wordcount/combiner/WordCountReducer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount.combiner; | ||
|
||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Reducer; | ||
|
||
import java.io.IOException; | ||
|
||
|
||
/** | ||
* @author TomShiDi | ||
* @since 2023/5/24 15:27 | ||
*/ | ||
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { | ||
|
||
private IntWritable outV = new IntWritable(); | ||
|
||
@Override | ||
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { | ||
int sum = 0; | ||
for (IntWritable value : values) { | ||
sum = sum + value.get(); | ||
} | ||
outV.set(sum); | ||
context.write(key, outV); | ||
} | ||
} |
24 changes: 24 additions & 0 deletions
24
...c/main/java/com/tomshidi/hadoop/mapreduce/wordcount/partitioner/WordCountPartitioner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package com.tomshidi.hadoop.mapreduce.wordcount.partitioner; | ||
|
||
import org.apache.hadoop.io.IntWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.mapreduce.Partitioner; | ||
|
||
|
||
/** | ||
* 自定义分区逻辑 | ||
* @author TomShiDi | ||
* @since 2024/2/27 15:57 | ||
*/ | ||
public class WordCountPartitioner extends Partitioner<Text, IntWritable> { | ||
@Override | ||
public int getPartition(Text text, IntWritable intWritable, int i) { | ||
String value = text.toString(); | ||
if (value == null || value.length() == 0) { | ||
return 0; | ||
} | ||
// 根据首字母分区 | ||
char firstChar = value.charAt(0); | ||
return firstChar % 5; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Hello World | ||
This is a simple word text | ||
Hello Hadoop |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
log4j.rootLogger=INFO, Console | ||
|
||
#Console | ||
log4j.appender.Console=org.apache.log4j.ConsoleAppender | ||
log4j.appender.Console.layout=org.apache.log4j.PatternLayout | ||
log4j.appender.Console.layout.ConversionPattern=%d [%t] %-5p [%c] - %m%n | ||
|
||
|
||
# Custom tweaks | ||
#log4j.logger.com.codahale.metrics=WARN | ||
#log4j.logger.com.ryantenney=WARN | ||
#log4j.logger.com.zaxxer=WARN | ||
#log4j.logger.org.apache=WARN | ||
#log4j.logger.org.hibernate=WARN | ||
#log4j.logger.org.hibernate.engine.internal=WARN | ||
#log4j.logger.org.hibernate.validator=WARN | ||
#log4j.logger.org.springframework=WARN | ||
#log4j.logger.org.springframework.web=WARN | ||
#log4j.logger.org.springframework.security=WARN | ||
|
||
# log file | ||
#log4j.appender.D = org.apache.log4j.DailyRollingFileAppender | ||
#log4j.appender.D.File = D://log.log | ||
#log4j.appender.D.Append = true | ||
#log4j.appender.D.Threshold = DEBUG | ||
#log4j.appender.D.layout = org.apache.log4j.PatternLayout | ||
#log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters