资源预览内容
第1页 / 共4页
第2页 / 共4页
第3页 / 共4页
第4页 / 共4页
亲,该文档总共4页全部预览完了,如果喜欢就下载吧!
资源描述
package cn.yws;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;/倒排索引 请在hadoop index_in目录下放置file1,file2测试文件public class MyInvertedIndex public static class Map extends Mapperprivate Text keyinfo=new Text();private Text valueinfo=new Text();private FileSplit split;/映射Overrideprotected void map(Object key, Text value,Context context)throws IOException, InterruptedException /super.map(key, value, context);/获取文件分词split=(FileSplit) context.getInputSplit();StringTokenizer tokenizer=new StringTokenizer(value.toString();while(tokenizer.hasMoreTokens()int splitindex=split.getPath().toString().indexOf(file);keyinfo.set(tokenizer.nextToken()+:+split.getPath().toString().substring(splitindex);valueinfo.set(1);/file3:1;context.write(keyinfo, valueinfo);public static class Combine extends Reducerprivate Text infoText=new Text();Overrideprotected void reduce(Text key, Iterable values,Context context)throws IOException, InterruptedException /super.reduce(key, values, context);int sum=0;for(Text value:values)sum+=Integer.parseInt(value.toString();int splitindex=key.toString().indexOf(:);/file2:1;file3:2;file1:1infoText.set(key.toString().substring(splitindex+1)+:+sum);key.set(key.toString().substring(0,splitindex);context.write(key, infoText);public static class Reduce extends Reducerprivate Text result=new Text();Overrideprotected void reduce(Text key, Iterable values,Context context)throws IOException, InterruptedException /super.reduce(key, values, context);/生成文档列表String filelist=new String();for(Text value:values)filelist+=value.toString()+;result.set(filelist);context.write(key, result);public static void main(String args) try Configuration configuration=new Configuration();/这句话很关键configuration.set(mapred.job.tracker, 192.168.1.15:9001);String ioargs=new Stringindex_in,index_out3;if(args.length=2)ioargs=args;String otherArgs=new GenericOptionsParser(configuration,ioargs).getRemainingArgs();if(otherArgs.length!=2)System.err.println(Usage:inverted +MyInvertedIndex.class.getSimpleName()+ );System.exit(2);/启动计算任务Job job=new Job(configuration, MyInvertedIndex.class.getSimpleName();job.setJarByClass(MyInvertedIndex.class);/映射job.setMapperClass(Map.class);/合成job.setCombinerClass(Combine.class);/规约job.setReducerClass(Reduce.class);/设置映射Map输出类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);/设置reduce规约输出类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);/设置输入和输出目录FileInputFormat.addInputPath(job, new Path(otherArgs0);FileOutputFormat.setOutputPath(job, new Path(otherArgs1);System.exit(job.waitForCompletion(true)?0:1); catch (Exception e) e.printStackTrace();
收藏 下载该资源
网站客服QQ:2055934822
金锄头文库版权所有
经营许可证:蜀ICP备13022795号 | 川公网安备 51140202000112号