admin管理员组

文章数量:829193

自己撸一个Wordcount

  1. 新建maven工程

  2. 修改pom文件

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns=".0.0"xmlns:xsi=""xsi:schemaLocation=".0.0 .0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.chen</groupId><artifactId>hadoop-hdfs-test</artifactId><version>1.0-SNAPSHOT</version><dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>RELEASE</version></dependency><dependency><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-core</artifactId><version>2.11.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>2.7.2</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>2.7.2</version></dependency></dependencies><build><plugins><plugin><artifactId>maven-compiler-plugin</artifactId><version>2.3.2</version><configuration><source>1.8</source><target>1.8</target></configuration></plugin><plugin><artifactId>maven-assembly-plugin </artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs><archive><manifest><mainClass>com.chen.mapreduce.WordcountDriver</mainClass></manifest></archive></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>single</goal></goals></execution></executions></plugin></plugins></build>
    </project>
    
  3. 新建Mapper

    package com.chen.mapreduce;import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class WordcountMapper extends Mapper<LongWritable,Text,Text, IntWritable> {Text k = new Text();IntWritable v = new IntWritable(1);@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String line = value.toString();String[] words = line.split(" ");for (String word: words){k.set(word);context.write(k,v);}}
    }
  4. 新建Reducer

    package com.chen.mapreduce;import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class WordcountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {int sum = 0;IntWritable v = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {sum = 0;for (IntWritable count: values){sum += count.get();}v.set(sum);context.write(key,v);}
    }
  5. 新建Driver

    package com.chen.mapreduce;import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class WordcountDriver {public static void main(String[] args) throws Exception {Configuration configuration = new Configuration();Job job = Job.getInstance(configuration);job.setJarByClass(WordcountDriver.class);job.setMapperClass(WordcountMapper.class);job.setReducerClass(WordcountReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}
    }
  6. 打包

  7. 上传

  8. 运行

    hadoop jar /data/app/hadoop-2.7.2/my-job/hadoop-hdfs-test-1.0-SNAPSHOT-jar-with-dependencies.jar  /data/hadoop/input /data/hadoop/output
  9. 查看结果

    10.查看执行日志

本文标签: 自己撸一个Wordcount