-
Notifications
You must be signed in to change notification settings - Fork 0
/
UnitSum.java
84 lines (67 loc) · 3.31 KB
/
UnitSum.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.text.DecimalFormat;
public class UnitSum {
public static class PassMapper extends Mapper<Object, Text, Text, DoubleWritable> {
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] pageSubrank = value.toString().split("\t");
double subRank = Double.parseDouble(pageSubrank[1]);
context.write(new Text(pageSubrank[0]), new DoubleWritable(subRank));
}
}
//add a new mapper to read pageRanki.txt, which will add beta*e to result sum
public static class BetaMapper extends Mapper<Object, Text, Text, DoubleWritable> {
float beta;
@Override
public void setup(Context context) {
Configuration conf = context.getConfiguration();
beta = conf.getFloat("beta", 0.2f);
}
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] pageRank = value.toString().split("\t");
double betaRank = Double.parseDouble(pageRank[1]) * beta;
context.write(new Text(pageRank[0]), new DoubleWritable(betaRank));
}
}
public static class SumReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
@Override
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
throws IOException, InterruptedException {
double sum = 0;
for (DoubleWritable value: values) {
sum += value.get();
}
DecimalFormat df = new DecimalFormat("#.0000");
sum = Double.valueOf(df.format(sum));
context.write(key, new DoubleWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
}