SampleDataJoinReducer.java 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.contrib.utils.join;
  19. import org.apache.hadoop.io.Text;
  20. import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase;
  21. import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;
  22. /**
  23. * This is a subclass of DataJoinReducerBase that is used to
  24. * demonstrate the functionality of INNER JOIN between 2 data
  25. * sources (TAB separated text files) based on the first column.
  26. */
  27. public class SampleDataJoinReducer extends DataJoinReducerBase {
  28. /**
  29. *
  30. * @param tags
  31. * a list of source tags
  32. * @param values
  33. * a value per source
  34. * @return combined value derived from values of the sources
  35. */
  36. protected TaggedMapOutput combine(Object[] tags, Object[] values) {
  37. // eliminate rows which didnot match in one of the two tables (for INNER JOIN)
  38. if (tags.length < 2)
  39. return null;
  40. String joinedStr = "";
  41. for (int i=0; i<tags.length; i++) {
  42. if (i > 0)
  43. joinedStr += "\t";
  44. // strip first column as it is the key on which we joined
  45. String line = ((Text) (((TaggedMapOutput) values[i]).getData())).toString();
  46. String[] tokens = line.split("\\t", 2);
  47. joinedStr += tokens[1];
  48. }
  49. TaggedMapOutput retv = new SampleTaggedMapOutput(new Text(joinedStr));
  50. retv.setTag((Text) tags[0]);
  51. return retv;
  52. }
  53. }