SampleDataJoinMapper.java 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.contrib.utils.join;
  19. import org.apache.hadoop.io.Text;
  20. import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase;
  21. import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;
  22. import org.apache.hadoop.contrib.utils.join.SampleTaggedMapOutput;
  23. /**
  24. * This is a subclass of DataJoinMapperBase that is used to
  25. * demonstrate the functionality of INNER JOIN between 2 data
  26. * sources (TAB separated text files) based on the first column.
  27. */
  28. public class SampleDataJoinMapper extends DataJoinMapperBase {
  29. protected Text generateInputTag(String inputFile) {
  30. // tag the row with input file name (data source)
  31. return new Text(inputFile);
  32. }
  33. protected Text generateGroupKey(TaggedMapOutput aRecord) {
  34. // first column in the input tab separated files becomes the key (to perform the JOIN)
  35. String line = ((Text) aRecord.getData()).toString();
  36. String groupKey = "";
  37. String[] tokens = line.split("\\t", 2);
  38. groupKey = tokens[0];
  39. return new Text(groupKey);
  40. }
  41. protected TaggedMapOutput generateTaggedMapOutput(Object value) {
  42. TaggedMapOutput retv = new SampleTaggedMapOutput((Text) value);
  43. retv.setTag(new Text(this.inputTag));
  44. return retv;
  45. }
  46. }