Rumah >pangkalan data >tutorial mysql >hama学习笔记(2)
写hama job 如果只是写hama job,根本不需要eclipse,所有的代码都可一在一个java文件中搞定。不过用惯了eclipse的人表示vim之类的实在用不惯。 在eclipse中可以建一个user library: 在eclipse菜单栏中:Window-Preferences-Java-Build Path-User Libraries
如果只是写hama job,根本不需要eclipse,所有的代码都可一在一个java文件中搞定。不过用惯了eclipse的人表示vim之类的实在用不惯。
在eclipse中可以建一个user library:
在eclipse菜单栏中:Window->Preferences->Java->Build Path->User Libraries->New新建一个user library,例如hama-0.6.0,勾选System Library。然后Add External JARs,将HAMA_HOME/lib中的jar包和HAMA_HOME下的jar包加进来。
新建Java Project时将这个user library加入工程,就OK了,可以试试hama example中计算PI的例子:
http://blog.csdn.net/bhq2010/article/details/8513052]
import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hama.HamaConfiguration; import org.apache.hama.bsp.BSP; import org.apache.hama.bsp.BSPJob; import org.apache.hama.bsp.BSPJobClient; import org.apache.hama.bsp.BSPPeer; import org.apache.hama.bsp.ClusterStatus; import org.apache.hama.bsp.FileOutputFormat; import org.apache.hama.bsp.NullInputFormat; import org.apache.hama.bsp.TextOutputFormat; import org.apache.hama.bsp.sync.SyncException; public class PiEstimator { private static Path TMP_OUTPUT = new Path("/tmp/pi-" + System.currentTimeMillis()); public static class MyEstimator extends BSP<nullwritable nullwritable text doublewritable> { public static final Log LOG = LogFactory.getLog(MyEstimator.class); private String masterTask; private static final int iterations = 10000; @Override public void bsp( BSPPeer<nullwritable nullwritable text doublewritable> peer) throws IOException, SyncException, InterruptedException { int in = 0; for (int i = 0; i peer) throws IOException { // Choose one as a master this.masterTask = peer.getPeerName(peer.getNumPeers() / 2); } @Override public void cleanup( BSPPeer<nullwritable nullwritable text doublewritable> peer) throws IOException { if (peer.getPeerName().equals(masterTask)) { double pi = 0.0; int numPeers = peer.getNumCurrentMessages(); DoubleWritable received; while ((received = peer.getCurrentMessage()) != null) { pi += received.get(); } pi = pi / numPeers; peer.write(new Text("Estimated value of PI is"), new DoubleWritable(pi)); } } } static void printOutput(HamaConfiguration conf) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.listStatus(TMP_OUTPUT); for (int i = 0; i 0) { FSDataInputStream in = fs.open(files[i].getPath()); IOUtils.copyBytes(in, System.out, conf, false); in.close(); break; } } fs.delete(TMP_OUTPUT, true); } public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { // BSP job configuration HamaConfiguration conf = new HamaConfiguration(); BSPJob bsp = new BSPJob(conf, PiEstimator.class); // Set the job name bsp.setJobName("Pi Estimation Example"); bsp.setBspClass(MyEstimator.class); bsp.setInputFormat(NullInputFormat.class); bsp.setOutputKeyClass(Text.class); bsp.setOutputValueClass(DoubleWritable.class); bsp.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(bsp, TMP_OUTPUT); BSPJobClient jobClient = new BSPJobClient(conf); ClusterStatus cluster = jobClient.getClusterStatus(true); if (args.length > 0) { bsp.setNumBspTask(Integer.parseInt(args[0])); } else { // Set to maximum bsp.setNumBspTask(cluster.getMaxTasks()); } long startTime = System.currentTimeMillis(); if (bsp.waitForCompletion(true)) { printOutput(conf); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); } } }</nullwritable></nullwritable></nullwritable>
Run as Java Application即可,这样运行是在单机模式下的,不需要安装和启动Hama集群。如果要在集群上运行可以将工程Export成Jar文件,发到集群上运行。
hama的源码工程是用maven构建的,下载hama的src包,解压;
在eclipse中安装m2e即可Import->Maven->Existing Maven Project->选择解压后的hama源码所在的目录,就可以导入,第一次导入时,maven会去下载依赖的包,所以时间比较长。
导入后有hama-core\hama-graph等6个工程,之后就可以用maven插件编译、调试,研究hama源码了。
如果不用eclipse,则需要下载安装maven2,过程google一下,随处可见。
为了省事,可以下载Juno版的eclipse for jave EE developer.这个版本的eclipse中带有了完整的m2e插件。