One can use the HadoopUtil class to prepare a Hadoop MapReduce job
org.apache.mahout.common.HadoopUtil
public static Job prepareJob(Path inputPath,
Path outputPath,
Class<? extends InputFormat> inputFormat,
Class<? extends Mapper> mapper,
Class<? extends Writable> mapperKey,
Class<? extends Writable> mapperValue,
Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {
Job job = new Job(new Configuration(conf));
Configuration jobConf = job.getConfiguration();
if (mapper.equals(Mapper.class)) {
throw new IllegalStateException(“Can’t figure out the user class jar file from mapper/reducer”);
}
job.setJarByClass(mapper);
job.setInputFormatClass(inputFormat);
jobConf.set(“mapred.input.dir”, inputPath.toString());
job.setMapperClass(mapper);
job.setMapOutputKeyClass(mapperKey);
job.setMapOutputValueClass(mapperValue);
job.setOutputKeyClass(mapperKey);
job.setOutputValueClass(mapperValue);
jobConf.setBoolean(“mapred.compress.map.output”, true);
job.setNumReduceTasks(0);
job.setOutputFormatClass(outputFormat);
jobConf.set(“mapred.output.dir”, outputPath.toString());
return job;
}
public static Job prepareJob(Path inputPath,
Path outputPath,
Class<? extends InputFormat> inputFormat,
Class<? extends Mapper> mapper,
Class<? extends Writable> mapperKey,
Class<? extends Writable> mapperValue,
Class<? extends Reducer> reducer,
Class<? extends Writable> reducerKey,
Class<? extends Writable> reducerValue,
Class<? extends OutputFormat> outputFormat,
Configuration conf) throws IOException {
Job job = new Job(new Configuration(conf));
Configuration jobConf = job.getConfiguration();
if (reducer.equals(Reducer.class)) {
if (mapper.equals(Mapper.class)) {
throw new IllegalStateException(“Can’t figure out the user class jar file from mapper/reducer”);
}
job.setJarByClass(mapper);
} else {
job.setJarByClass(reducer);
}
job.setInputFormatClass(inputFormat);
jobConf.set(“mapred.input.dir”, inputPath.toString());
job.setMapperClass(mapper);
if (mapperKey != null) {
job.setMapOutputKeyClass(mapperKey);
}
if (mapperValue != null) {
job.setMapOutputValueClass(mapperValue);
}
jobConf.setBoolean(“mapred.compress.map.output”, true);
job.setReducerClass(reducer);
job.setOutputKeyClass(reducerKey);
job.setOutputValueClass(reducerValue);
job.setOutputFormatClass(outputFormat);
jobConf.set(“mapred.output.dir”, outputPath.toString());
return job;
}