浏览代码

HADOOP-2127. Added a pipes sort example to benchmark trivial pipes application versus trivial java application. Contributed by Owen O'Malley.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@597172 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 18 年之前
父节点
当前提交
9e52b396ed
共有 4 个文件被更改,包括 126 次插入9 次删除
  1. 3 0
      CHANGES.txt
  2. 4 1
      src/examples/pipes/Makefile.am
  3. 23 8
      src/examples/pipes/Makefile.in
  4. 96 0
      src/examples/pipes/impl/sort.cc

+ 3 - 0
CHANGES.txt

@@ -63,6 +63,9 @@ Trunk (unreleased changes)
     HADOOP-2104. Adds a description to the ant targets. This makes the 
     HADOOP-2104. Adds a description to the ant targets. This makes the 
     output of "ant -projecthelp" sensible. (Chris Douglas via ddas)
     output of "ant -projecthelp" sensible. (Chris Douglas via ddas)
 
 
+    HADOOP-2127. Added a pipes sort example to benchmark trivial pipes
+    application versus trivial java application. (omalley via acmurthy)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-1898.  Release the lock protecting the last time of the last stack
     HADOOP-1898.  Release the lock protecting the last time of the last stack

+ 4 - 1
src/examples/pipes/Makefile.am

@@ -19,7 +19,7 @@ AM_CXXFLAGS=-Wall -I$(HADOOP_UTILS_PREFIX)/include \
 LDADD=-L$(HADOOP_UTILS_PREFIX)/lib -L$(HADOOP_PIPES_PREFIX)/lib \
 LDADD=-L$(HADOOP_UTILS_PREFIX)/lib -L$(HADOOP_PIPES_PREFIX)/lib \
       -lhadooppipes -lhadooputils
       -lhadooppipes -lhadooputils
 
 
-bin_PROGRAMS= wordcount-simple wordcount-part wordcount-nopipe
+bin_PROGRAMS= wordcount-simple wordcount-part wordcount-nopipe pipes-sort
 
 
 # Define the sources for each program
 # Define the sources for each program
 wordcount_simple_SOURCES = \
 wordcount_simple_SOURCES = \
@@ -31,3 +31,6 @@ wordcount_part_SOURCES = \
 wordcount_nopipe_SOURCES = \
 wordcount_nopipe_SOURCES = \
 	impl/wordcount-nopipe.cc
 	impl/wordcount-nopipe.cc
 
 
+pipes_sort_SOURCES = \
+        impl/sort.cc
+

+ 23 - 8
src/examples/pipes/Makefile.in

@@ -14,7 +14,7 @@
 
 
 @SET_MAKE@
 @SET_MAKE@
 
 
-SOURCES = $(wordcount_nopipe_SOURCES) $(wordcount_part_SOURCES) $(wordcount_simple_SOURCES)
+SOURCES = $(pipes_sort_SOURCES) $(wordcount_nopipe_SOURCES) $(wordcount_part_SOURCES) $(wordcount_simple_SOURCES)
 
 
 srcdir = @srcdir@
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 top_srcdir = @top_srcdir@
@@ -38,11 +38,12 @@ PRE_UNINSTALL = :
 POST_UNINSTALL = :
 POST_UNINSTALL = :
 host_triplet = @host@
 host_triplet = @host@
 bin_PROGRAMS = wordcount-simple$(EXEEXT) wordcount-part$(EXEEXT) \
 bin_PROGRAMS = wordcount-simple$(EXEEXT) wordcount-part$(EXEEXT) \
-	wordcount-nopipe$(EXEEXT)
-DIST_COMMON = config.guess config.sub $(srcdir)/Makefile.in \
-	$(srcdir)/Makefile.am $(top_srcdir)/configure \
-	$(am__configure_deps) $(top_srcdir)/impl/config.h.in depcomp \
-	ltmain.sh config.guess config.sub
+	wordcount-nopipe$(EXEEXT) pipes-sort$(EXEEXT)
+DIST_COMMON = config.guess config.guess config.sub config.sub \
+	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+	$(top_srcdir)/configure $(am__configure_deps) \
+	$(top_srcdir)/impl/config.h.in depcomp depcomp ltmain.sh \
+	ltmain.sh config.guess config.guess config.sub config.sub
 subdir = .
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps =  \
 am__aclocal_m4_deps =  \
@@ -59,6 +60,10 @@ am__installdirs = "$(DESTDIR)$(bindir)"
 binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
 binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
 PROGRAMS = $(bin_PROGRAMS)
 PROGRAMS = $(bin_PROGRAMS)
 am__dirstamp = $(am__leading_dot)dirstamp
 am__dirstamp = $(am__leading_dot)dirstamp
+am_pipes_sort_OBJECTS = impl/sort.$(OBJEXT)
+pipes_sort_OBJECTS = $(am_pipes_sort_OBJECTS)
+pipes_sort_LDADD = $(LDADD)
+pipes_sort_DEPENDENCIES =
 am_wordcount_nopipe_OBJECTS = impl/wordcount-nopipe.$(OBJEXT)
 am_wordcount_nopipe_OBJECTS = impl/wordcount-nopipe.$(OBJEXT)
 wordcount_nopipe_OBJECTS = $(am_wordcount_nopipe_OBJECTS)
 wordcount_nopipe_OBJECTS = $(am_wordcount_nopipe_OBJECTS)
 wordcount_nopipe_LDADD = $(LDADD)
 wordcount_nopipe_LDADD = $(LDADD)
@@ -82,8 +87,8 @@ LTCXXCOMPILE = $(LIBTOOL) --mode=compile --tag=CXX $(CXX) $(DEFS) \
 CXXLD = $(CXX)
 CXXLD = $(CXX)
 CXXLINK = $(LIBTOOL) --mode=link --tag=CXX $(CXXLD) $(AM_CXXFLAGS) \
 CXXLINK = $(LIBTOOL) --mode=link --tag=CXX $(CXXLD) $(AM_CXXFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(wordcount_nopipe_SOURCES) $(wordcount_part_SOURCES) \
-	$(wordcount_simple_SOURCES)
+SOURCES = $(pipes_sort_SOURCES) $(wordcount_nopipe_SOURCES) \
+	$(wordcount_part_SOURCES) $(wordcount_simple_SOURCES)
 ETAGS = etags
 ETAGS = etags
 CTAGS = ctags
 CTAGS = ctags
 ACLOCAL = @ACLOCAL@
 ACLOCAL = @ACLOCAL@
@@ -218,6 +223,9 @@ wordcount_part_SOURCES = \
 wordcount_nopipe_SOURCES = \
 wordcount_nopipe_SOURCES = \
 	impl/wordcount-nopipe.cc
 	impl/wordcount-nopipe.cc
 
 
+pipes_sort_SOURCES = \
+        impl/sort.cc
+
 all: all-am
 all: all-am
 
 
 .SUFFIXES:
 .SUFFIXES:
@@ -306,6 +314,11 @@ impl/$(am__dirstamp):
 impl/$(DEPDIR)/$(am__dirstamp):
 impl/$(DEPDIR)/$(am__dirstamp):
 	@$(mkdir_p) impl/$(DEPDIR)
 	@$(mkdir_p) impl/$(DEPDIR)
 	@: > impl/$(DEPDIR)/$(am__dirstamp)
 	@: > impl/$(DEPDIR)/$(am__dirstamp)
+impl/sort.$(OBJEXT): impl/$(am__dirstamp) \
+	impl/$(DEPDIR)/$(am__dirstamp)
+pipes-sort$(EXEEXT): $(pipes_sort_OBJECTS) $(pipes_sort_DEPENDENCIES) 
+	@rm -f pipes-sort$(EXEEXT)
+	$(CXXLINK) $(pipes_sort_LDFLAGS) $(pipes_sort_OBJECTS) $(pipes_sort_LDADD) $(LIBS)
 impl/wordcount-nopipe.$(OBJEXT): impl/$(am__dirstamp) \
 impl/wordcount-nopipe.$(OBJEXT): impl/$(am__dirstamp) \
 	impl/$(DEPDIR)/$(am__dirstamp)
 	impl/$(DEPDIR)/$(am__dirstamp)
 wordcount-nopipe$(EXEEXT): $(wordcount_nopipe_OBJECTS) $(wordcount_nopipe_DEPENDENCIES) 
 wordcount-nopipe$(EXEEXT): $(wordcount_nopipe_OBJECTS) $(wordcount_nopipe_DEPENDENCIES) 
@@ -324,6 +337,7 @@ wordcount-simple$(EXEEXT): $(wordcount_simple_OBJECTS) $(wordcount_simple_DEPEND
 
 
 mostlyclean-compile:
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
 	-rm -f *.$(OBJEXT)
+	-rm -f impl/sort.$(OBJEXT)
 	-rm -f impl/wordcount-nopipe.$(OBJEXT)
 	-rm -f impl/wordcount-nopipe.$(OBJEXT)
 	-rm -f impl/wordcount-part.$(OBJEXT)
 	-rm -f impl/wordcount-part.$(OBJEXT)
 	-rm -f impl/wordcount-simple.$(OBJEXT)
 	-rm -f impl/wordcount-simple.$(OBJEXT)
@@ -331,6 +345,7 @@ mostlyclean-compile:
 distclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 	-rm -f *.tab.c
 
 
+@AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/sort.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-nopipe.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-nopipe.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-part.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-part.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-simple.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@impl/$(DEPDIR)/wordcount-simple.Po@am__quote@

+ 96 - 0
src/examples/pipes/impl/sort.cc

@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hadoop/Pipes.hh"
+#include "hadoop/TemplateFactory.hh"
+
+class SortMap: public HadoopPipes::Mapper {
+private:
+  /* the fraction 0.0 to 1.0 of records to keep */
+  float keepFraction;
+  /* the number of records kept so far */
+  long long keptRecords;
+  /* the total number of records */
+  long long totalRecords;
+  static const std::string MAP_KEEP_PERCENT;
+public:
+  /*
+   * Look in the config to find the fraction of records to keep.
+   */
+  SortMap(HadoopPipes::TaskContext& context){
+    const HadoopPipes::JobConf* conf = context.getJobConf();
+    if (conf->hasKey(MAP_KEEP_PERCENT)) {
+      keepFraction = conf->getFloat(MAP_KEEP_PERCENT) / 100.0;
+    } else {
+      keepFraction = 1.0;
+    }
+    keptRecords = 0;
+    totalRecords = 0;
+  }
+
+  void map(HadoopPipes::MapContext& context) {
+    totalRecords += 1;
+    while ((float) keptRecords / totalRecords < keepFraction) {
+      keptRecords += 1;
+      context.emit(context.getInputKey(), context.getInputValue());
+    }
+  }
+};
+
+const std::string SortMap::MAP_KEEP_PERCENT("hadoop.sort.map.keep.percent");
+
+class SortReduce: public HadoopPipes::Reducer {
+private:
+  /* the fraction 0.0 to 1.0 of records to keep */
+  float keepFraction;
+  /* the number of records kept so far */
+  long long keptRecords;
+  /* the total number of records */
+  long long totalRecords;
+  static const std::string REDUCE_KEEP_PERCENT;
+public:
+  SortReduce(HadoopPipes::TaskContext& context){
+    const HadoopPipes::JobConf* conf = context.getJobConf();
+    if (conf->hasKey(REDUCE_KEEP_PERCENT)) {
+      keepFraction = conf->getFloat(REDUCE_KEEP_PERCENT) / 100.0;
+    } else {
+      keepFraction = 1.0;
+    }
+    keptRecords = 0;
+    totalRecords = 0;
+  }
+
+  void reduce(HadoopPipes::ReduceContext& context) {
+    while (context.nextValue()) {
+      totalRecords += 1;
+      while ((float) keptRecords / totalRecords < keepFraction) {
+        keptRecords += 1;
+        context.emit(context.getInputKey(), context.getInputValue());
+      }
+    }
+  }
+};
+
+const std::string 
+  SortReduce::REDUCE_KEEP_PERCENT("hadoop.sort.reduce.keep.percent");
+
+int main(int argc, char *argv[]) {
+  return HadoopPipes::runTask(HadoopPipes::TemplateFactory<SortMap,
+                                                           SortReduce>());
+}
+