소스 검색

HADOOP-4566. Deploy new hive code to support more types.
(Zheng Shao via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@712905 13f79535-47bb-0310-9956-ffa450edef68

Dhruba Borthakur 16 년 전
부모
커밋
fbf5c1b76c
100개의 변경된 파일4862개의 추가작업 그리고 963개의 파일을 삭제
  1. 3 0
      CHANGES.txt
  2. 80 12
      src/contrib/hive/README
  3. 0 4
      src/contrib/hive/build.xml
  4. 99 28
      src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
  5. 2 2
      src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/SetProcessor.java
  6. 7 1
      src/contrib/hive/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
  7. 18 0
      src/contrib/hive/conf/hive-default.xml
  8. 1 0
      src/contrib/hive/data/files/apache.access.log
  9. 9 3
      src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
  10. 15 4
      src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
  11. 77 0
      src/contrib/hive/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
  12. 2 2
      src/contrib/hive/metastore/src/test/org/apache/hadoop/hive/metastore/TestPartitions.java
  13. 1 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
  14. 7 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
  15. 22 2
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
  16. 44 4
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
  17. 16 28
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java
  18. 2 13
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFuncEvaluator.java
  19. 2 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java
  20. 138 33
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
  21. 16 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java
  22. 62 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
  23. 232 135
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
  24. 179 126
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
  25. 5 2
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java
  26. 5 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
  27. 44 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
  28. 30 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
  29. 7 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
  30. 127 27
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
  31. 86 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
  32. 61 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
  33. 123 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Throttle.java
  34. 44 8
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
  35. 330 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/FlatFileInputFormat.java
  36. 0 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
  37. 17 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
  38. 4 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
  39. 68 4
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
  40. 168 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
  41. 74 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
  42. 15 11
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java
  43. 44 15
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
  44. 89 18
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
  45. 8 3
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
  46. 62 36
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
  47. 69 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java
  48. 0 68
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OperatorInfo.java
  49. 264 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
  50. 46 9
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java
  51. 1 10
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
  52. 34 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
  53. 0 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java
  54. 4 11
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
  55. 41 6
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
  56. 61 8
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePruner.java
  57. 607 145
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
  58. 3 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
  59. 100 7
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/TableSample.java
  60. 161 18
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
  61. 41 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/alterTableDesc.java
  62. 11 2
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java
  63. 7 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java
  64. 13 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java
  65. 17 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java
  66. 17 2
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeIndexDesc.java
  67. 41 49
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/fetchWork.java
  68. 3 2
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/groupByDesc.java
  69. 14 14
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/mapredWork.java
  70. 28 14
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/reduceSinkDesc.java
  71. 21 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/selectDesc.java
  72. 115 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
  73. 1 1
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/typeinfo/PrimitiveTypeInfo.java
  74. 68 0
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/typeinfo/TypeInfoUtils.java
  75. 5 5
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFAvg.java
  76. 6 6
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java
  77. 9 9
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMax.java
  78. 9 9
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMin.java
  79. 10 10
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFSum.java
  80. 0 8
      src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java
  81. 4 0
      src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
  82. 14 5
      src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
  83. 13 9
      src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/io/JavaTestObjFlatFileInputFormat.java
  84. 212 0
      src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/io/RecordTestObj.java
  85. 281 0
      src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/io/TestFlatFileInputFormat.java
  86. 6 0
      src/contrib/hive/ql/src/test/queries/clientnegative/bad_sample_clause.q
  87. 1 0
      src/contrib/hive/ql/src/test/queries/clientnegative/input1.q
  88. 1 0
      src/contrib/hive/ql/src/test/queries/clientnegative/input2.q
  89. 6 0
      src/contrib/hive/ql/src/test/queries/clientnegative/input_testxpath4.q
  90. 7 0
      src/contrib/hive/ql/src/test/queries/clientnegative/invalid_create_tbl1.q
  91. 1 0
      src/contrib/hive/ql/src/test/queries/clientnegative/invalid_tbl_name.q
  92. 6 0
      src/contrib/hive/ql/src/test/queries/clientnegative/joinneg.q
  93. 6 0
      src/contrib/hive/ql/src/test/queries/clientnegative/load_wrong_fileformat.q
  94. 4 0
      src/contrib/hive/ql/src/test/queries/clientnegative/notable_alias3.q
  95. 4 0
      src/contrib/hive/ql/src/test/queries/clientnegative/notable_alias4.q
  96. 20 0
      src/contrib/hive/ql/src/test/queries/clientpositive/alter1.q
  97. 1 1
      src/contrib/hive/ql/src/test/queries/clientpositive/case_sensitivity.q
  98. 1 1
      src/contrib/hive/ql/src/test/queries/clientpositive/cast1.q
  99. 1 1
      src/contrib/hive/ql/src/test/queries/clientpositive/groupby1.q
  100. 1 1
      src/contrib/hive/ql/src/test/queries/clientpositive/groupby1_limit.q

+ 3 - 0
CHANGES.txt

@@ -110,6 +110,9 @@ Trunk (unreleased changes)
     HADOOP-4621. Fix javadoc warnings caused by duplicate jars. (Kan Zhang via
     cdouglas)
 
+    HADOOP-4566. Deploy new hive code to support more types.
+    (Zheng Shao via dhruba)
+
 Release 0.19.0 - Unreleased
 
   INCOMPATIBLE CHANGES

+ 80 - 12
src/contrib/hive/README

@@ -118,6 +118,10 @@ hive> DESCRIBE invites;
 
 shows the list of columns
 
+hive> DESCRIBE EXTENDED invites;
+
+shows the list of columns plus any other meta information about the table
+
 Altering tables. Table name can be changed and additional columns can be dropped
 
 hive> ALTER TABLE pokes ADD COLUMNS (new_col INT);
@@ -258,28 +262,92 @@ INSERT OVERWRITE LOCAL DIRECTORY '/tmp/dest4.out' SELECT src.value WHERE src.key
 
 STREAMING
 ---------
-hive> FROM invites a INSERT OVERWRITE TABLE events SELECT TRANSFORM(a.foo, a.bar) AS (oof, rab) USING '/bin/cat' WHERE a.ds > '2008-08-09';
+hive> FROM invites a INSERT OVERWRITE TABLE events
+    > SELECT TRANSFORM(a.foo, a.bar) AS (oof, rab)
+    > USING '/bin/cat' WHERE a.ds > '2008-08-09';
 
-This streams the data in the map phase through the script /bin/cat (like hadoop streaming). 
+This streams the data in the map phase through the script /bin/cat (like hadoop streaming).
 Similarly - streaming can be used on the reduce side (please see the Hive Tutorial or examples)
 
 KNOWN BUGS/ISSUES
 -----------------
 * hive cli may hang for a couple of minutes because of a bug in getting metadata
   from the derby database. let it run and you'll be fine!
-* hive cli does not support multi-line queries.
 * hive cli creates derby.log in the directory from which it has been invoked.
-* DESCRIBE table currently only shows columns in a table. Other metadata like
-  partitions, buckets etc are not shown.
-* LOAD FILE or INSERT INTO TABLE do not validate schemas of the destination tables.
 * COUNT(*) does not work for now. Use COUNT(1) instead.
-* String literals are indicated by single quotes(double quotes are not supported). 
-  So 'is a valid string' while "is not a valid string" in the query language. Hive
-  does support escaping quotes and semi-colon similar to MySQL.
-* Multiple GROUP BYs are not supported in the multi-table table INSERT queries.
-* ORDER BY not supported.
+* ORDER BY not supported yet.
 * Only string and thrift types (http://developers.facebook.com/thrift) have been tested.
-
+* When doing Join, please put the table with big number of rows containing the same join key to
+the rightmost in the JOIN clause. Otherwise we may see OutOfMemory errors.
 
 FUTURE FEATURES
 ---------------
+* EXPLODE function to generate multiple rows from a column of list type.
+* Simpler syntax for running Map/Reduce scripts.
+* ORDER BY and SORT BY.
+* Table statistics for query optimization.
+
+Developing Hive using Eclipse
+------------------------
+1. Set up hadoop development environment with Eclipse:
+http://wiki.apache.org/hadoop/EclipseEnvironment
+
+2. Download Hive src code from:
+http://mirror.facebook.com/facebook/hive
+
+If hadoop version is 0.17.x or 0.18.x, use
+http://mirror.facebook.com/facebook/hive/hadoop-0.17/
+
+If hadoop version is 0.19.x or up or trunk, use
+http://mirror.facebook.com/facebook/hive/hadoop-0.19/
+
+3. Extract the Hive src code to src/contrib/hive, make sure this file (README)
+  is in src/contrib/hive.
+
+4. In src/contrib/hive, run "ant package"
+
+5. In src/contrib/hive, run "ant -logfile test.log test" to make sure
+   everything works.  This test may take 20 minutes.
+
+6. Add the following list to the Eclipse project's .classpath file:
+       <classpathentry kind="src" path="build/contrib/hive/ql/test/src"/>
+       <classpathentry kind="src" path="build/contrib/hive/ql/gen-java"/>
+       <classpathentry kind="src" path="src/contrib/hive/cli/src/java"/>
+       <classpathentry kind="src" path="src/contrib/hive/common/src/java"/>
+       <classpathentry kind="src" path="src/contrib/hive/metastore/src/model"/>
+       <classpathentry kind="src" path="src/contrib/hive/metastore/src/gen-javabean"/>
+       <classpathentry kind="src" path="src/contrib/hive/metastore/src/java"/>
+       <classpathentry kind="src" path="src/contrib/hive/metastore/src/test"/>
+       <classpathentry kind="src" path="src/contrib/hive/ql/src/java"/>
+       <classpathentry kind="src" path="src/contrib/hive/ql/src/test"/>
+       <classpathentry kind="src" path="src/contrib/hive/serde/src/gen-java"/>
+       <classpathentry kind="src" path="src/contrib/hive/serde/src/java"/>
+       <classpathentry kind="src" path="src/contrib/hive/serde/src/test"/>
+       <classpathentry kind="lib" path="src/contrib/hive/cli/lib/jline-0.9.94.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/asm-3.1.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/commons-lang-2.4.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/derby.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/jdo2-api-2.1.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/jpox-core-1.2.2.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/jpox-enhancer-1.2.2.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/jpox-rdbms-1.2.2.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/libfb303.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/lib/libthrift.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/ql/lib/antlr-3.0.1.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/ql/lib/antlr-runtime-3.0.1.jar"/>
+       <classpathentry kind="lib" path="src/contrib/hive/ql/lib/commons-jexl-1.1.jar"/>
+       <classpathentry kind="lib" path="build/contrib/hive/metastore/metastore_model.jar" sourcepath="src/contrib/hive/metastore/src/model"/>
+
+7. Develop using Eclipse.
+
+
+Development Tips
+------------------------
+* You may change the first line in conf/hive-log4j.properties to the following line to see error messages on the console.
+hive.root.logger=INFO,console
+Otherwise you will see error messages in /tmp/<username>
+* You may use the following line to test a specific testcase with a specific query file.
+ant -Dtestcase=TestParse -Dqfile=udf4.q test
+ant -Dtestcase=TestParseNegative -Dqfile=invalid_dot.q test
+ant -Dtestcase=TestCliDriver -Dqfile=udf1.q test
+ant -Dtestcase=TestNegativeCliDriver -Dqfile=invalid_tbl_name.q test

+ 0 - 4
src/contrib/hive/build.xml

@@ -161,10 +161,6 @@
     <copy todir="${target.example.dir}/queries" preservelastmodified="true" flatten="true">
       <fileset dir="${ql.test.query.dir}/positive" includes="*.q"/>
     </copy>
-    <mkdir dir="${dist.dir}/contrib/hive"/>
-    <copy todir="${dist.dir}/contrib/hive">
-      <fileset dir="${target.dir}"/>
-    </copy>
   </target>
 
 </project>

+ 99 - 28
src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 
 public class CliDriver {
@@ -41,7 +42,18 @@ public class CliDriver {
   public static SetProcessor sp;
   public static Driver qp;
   public static FsShell dfs;
+  public static Log LOG = LogFactory.getLog("CliDriver");
 
+  /**
+   * delay console initialization until session has been initialized
+   */
+  public static LogHelper console;
+  public static LogHelper getConsole() {
+    if(console == null)
+      console = new LogHelper(LOG);
+    return (console);
+  }
+  
   public CliDriver(CliSessionState ss) {
     SessionState.start(ss);
     sp = new SetProcessor();
@@ -49,23 +61,28 @@ public class CliDriver {
   }
   
   public static int processCmd(String cmd) {
+
+    SessionState ss = SessionState.get();
+    LogHelper console = getConsole();
+
     String[] tokens = cmd.split("\\s+");
     String cmd_1 = cmd.substring(tokens[0].length());
     int ret = 0;
     
-    if(tokens[0].equals("set")) {
+    if(tokens[0].toLowerCase().equals("set")) {
+
       ret = sp.run(cmd_1);
-    } else if (cmd.equals("quit") || cmd.equals("exit")) {
+
+    } else if (cmd.toLowerCase().equals("quit") || cmd.toLowerCase().equals("exit")) {
+
       // if we have come this far - either the previous commands
       // are all successful or this is command line. in either case
       // this counts as a successful run
       System.exit(0);
+
     } else if (cmd.startsWith("!")) {
-      SessionState ss = SessionState.get();
+
       String shell_cmd = cmd.substring(1);
-      if (shell_cmd.endsWith(";")) {
-        shell_cmd = shell_cmd.substring(0, shell_cmd.length()-1);
-      }
 
       //shell_cmd = "/bin/bash -c \'" + shell_cmd + "\'";
       try {
@@ -76,48 +93,102 @@ public class CliDriver {
         outPrinter.start();
         errPrinter.start();
       
-        int exitVal = executor.waitFor();
-        if (exitVal != 0) {
-          ss.err.write((new String("Command failed with exit code = " + exitVal)).getBytes());
+        ret = executor.waitFor();
+        if (ret != 0) {
+          console.printError("Command failed with exit code = " + ret);
         }
       }
       catch (Exception e) {
-        e.printStackTrace();
+        console.printError("Exception raised from Shell command " + e.getLocalizedMessage(),
+                           org.apache.hadoop.util.StringUtils.stringifyException(e));
+        ret = 1;
       }
-    } else if (cmd.startsWith("dfs")) {
+
+    } else if (tokens[0].toLowerCase().equals("dfs")) {
+
       // dfs shell commands
-      SessionState ss = SessionState.get();
       if(dfs == null)
         dfs = new FsShell(ss.getConf());
-      String hadoopCmd = cmd.replaceFirst("dfs\\s+", "");
-      hadoopCmd = hadoopCmd.trim();
-      if (hadoopCmd.endsWith(";")) {
-        hadoopCmd = hadoopCmd.substring(0, hadoopCmd.length()-1);
-      }
-      String[] args = hadoopCmd.split("\\s+");
+
+      String [] alt_tokens = new String [tokens.length-1];
+      System.arraycopy(tokens, 1, alt_tokens, 0, tokens.length-1);
+      tokens = alt_tokens;
+
       try {
         PrintStream oldOut = System.out;
         System.setOut(ss.out);
-        int val = dfs.run(args);
+        ret = dfs.run(tokens);
         System.setOut(oldOut);
-        if (val != 0) {
-          ss.err.write((new String("Command failed with exit code = " + val)).getBytes());
+        if (ret != 0) {
+          console.printError("Command failed with exit code = " + ret);
         }
       } catch (Exception e) {
-        ss.err.println("Exception raised from DFSShell.run " + e.getLocalizedMessage()); 
+        console.printError("Exception raised from DFSShell.run " + e.getLocalizedMessage(),
+                           org.apache.hadoop.util.StringUtils.stringifyException(e));
+        ret = 1;
+      }
+
+    } else if (tokens[0].toLowerCase().equals("list")) {
+
+      SessionState.ResourceType t;
+      if(tokens.length < 2 || (t = SessionState.find_resource_type(tokens[1])) == null) {
+        console.printError("Usage: list [" +
+                           StringUtils.join(SessionState.ResourceType.values(),"|") +
+                           "] [<value> [<value>]*]" );
+        ret = 1;
+      } else {
+        List<String> filter = null;
+        if(tokens.length >=3) {
+          System.arraycopy(tokens, 2, tokens, 0, tokens.length-2);
+          filter = Arrays.asList(tokens);
+        }
+        Set<String> s = ss.list_resource(t, filter);
+        if(s != null && !s.isEmpty())
+          ss.out.println(StringUtils.join(s, "\n"));
+      }
+
+    } else if (tokens[0].toLowerCase().equals("add")) {
+
+      SessionState.ResourceType t;
+      if(tokens.length < 3 || (t = SessionState.find_resource_type(tokens[1])) == null) {
+        console.printError("Usage: add [" +
+                           StringUtils.join(SessionState.ResourceType.values(),"|") +
+                           "] <value> [<value>]*");
+        ret = 1;
+      } else {
+        for(int i = 2; i<tokens.length; i++) {
+          ss.add_resource(t, tokens[i]);
+        }
+      }
+
+    } else if (tokens[0].toLowerCase().equals("delete")) {
+
+      SessionState.ResourceType t;
+      if(tokens.length < 2 || (t = SessionState.find_resource_type(tokens[1])) == null) {
+        console.printError("Usage: delete [" +
+                           StringUtils.join(SessionState.ResourceType.values(),"|") +
+                           "] [<value>]");
+        ret = 1;
+      } else if (tokens.length >= 3) {
+        for(int i = 2; i<tokens.length; i++) {
+          ss.delete_resource(t, tokens[i]);
+        }
+      } else {
+        ss.delete_resource(t);
       }
+
     } else {
+      PrintStream out = ss.out;
+
       ret = qp.run(cmd);
       Vector<String> res = new Vector<String>();
       while (qp.getResults(res)) {
       	for (String r:res) {
-          SessionState ss  = SessionState.get();
-          PrintStream out = ss.out;
           out.println(r);
       	}
         res.clear();
       }
-
+      
       int cret = qp.close();
       if (ret == 0) {
         ret = cret;
@@ -223,8 +294,7 @@ public class CliDriver {
     String historyFile = System.getProperty("user.home") + File.separator  + HISTORYFILE;
     reader.setHistory(new History(new File(historyFile)));
     int ret = 0;
-    Log LOG = LogFactory.getLog("CliDriver");
-    LogHelper console = new LogHelper(LOG);
+
     String prefix = "";
     String curPrompt = prompt;
     while ((line = reader.readLine(curPrompt+"> ")) != null) {
@@ -242,10 +312,11 @@ public class CliDriver {
       long end = System.currentTimeMillis();
       if (end > start) {
         double timeTaken = (double)(end-start)/1000.0;
-        console.printInfo("Time taken: " + timeTaken + " seconds", null);
+        getConsole().printInfo("Time taken: " + timeTaken + " seconds", null);
       }
     }
 
     System.exit(ret);
   }
+
 }

+ 2 - 2
src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/SetProcessor.java

@@ -81,8 +81,8 @@ public class SetProcessor implements CommandProcessor {
       part[0] = nwcmd.substring(0, nwcmd.length()-1);
       part[1] = "";
     } else {
-      part[0] = nwcmd.substring(0, eqIndex);
-      part[1] = nwcmd.substring(eqIndex+1);
+      part[0] = nwcmd.substring(0, eqIndex).trim();
+      part[1] = nwcmd.substring(eqIndex+1).trim();
     }
 
     try {

+ 7 - 1
src/contrib/hive/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java

@@ -81,7 +81,13 @@ public class HiveConf extends Configuration {
     HIVETABLENAME("hive.table.name", ""),
     HIVEPARTITIONNAME("hive.partition.name", ""),
     HIVEPARTITIONPRUNER("hive.partition.pruning", "nonstrict"),
-    HIVEALIAS("hive.alias", "");
+    HIVEALIAS("hive.alias", ""),
+    HIVEMAPSIDEAGGREGATE("hive.map.aggr", "false"),
+    HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
+    
+    // Default file format for CREATE TABLE statement
+    // Options: TextFile, SequenceFile
+    HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile");
     
     public final String varname;
     public final String defaultVal;

+ 18 - 0
src/contrib/hive/conf/hive-default.xml

@@ -84,4 +84,22 @@
   <description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
 </property>
 
+<property>
+  <name>hive.default.fileformat</name>
+  <value>TextFile</value>
+  <description>Default file format for CREATE TABLE statement. Options are TextFile and SequenceFile. Users can explicitly say CREATE TABLE ... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
+</property>
+
+<property>
+  <name>hive.map.aggr</name>
+  <value>false</value>
+  <description>Whether to use map-side aggregation in Hive Group By queries</description>
+</property>
+
+<property>
+  <name>hive.join.emit.interval</name>
+  <value>1000</value>
+  <description>How many rows in the right-most join operand Hive should buffer before emitting the join result. </description>
+</property>
+
 </configuration>

+ 1 - 0
src/contrib/hive/data/files/apache.access.log

@@ -0,0 +1 @@
+127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326

+ 9 - 3
src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java

@@ -271,8 +271,10 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         this.incrementCounter("create_table");
         logStartFunction("create_table: db=" + tbl.getDbName() + " tbl=" + tbl.getTableName());
         boolean success = false;
-        if(!MetaStoreUtils.validateName(tbl.getTableName())) {
-          throw new InvalidObjectException(tbl.getTableName() + " is not a valid object name");
+        if(!MetaStoreUtils.validateName(tbl.getTableName()) ||
+            !MetaStoreUtils.validateColNames(tbl.getSd().getCols()) ||
+             (tbl.getPartitionKeys() != null && !MetaStoreUtils.validateColNames(tbl.getPartitionKeys()))) {
+            throw new InvalidObjectException(tbl.getTableName() + " is not a valid object name");
         }
         try {
           getMS().openTransaction();
@@ -540,11 +542,15 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         logStartFunction("getVersion");
         return "3.0";
       }
-
+      
       public void alter_table(String dbname, String name, Table newTable) throws InvalidOperationException,
           MetaException {
         this.incrementCounter("alter_table");
         logStartFunction("truncate_table: db=" + dbname + " tbl=" + name + " newtbl=" + newTable.getTableName());
+        if(!MetaStoreUtils.validateName(newTable.getTableName()) ||
+            !MetaStoreUtils.validateColNames(newTable.getSd().getCols())) {
+          throw new InvalidOperationException(newTable.getTableName() + " is not a valid object name");
+        }
         try {
           getMS().alterTable(dbname, name, newTable);
         } catch (InvalidObjectException e) {

+ 15 - 4
src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java

@@ -231,8 +231,8 @@ public class MetaStoreUtils {
   /**
    * validateName
    *
-   * Checks the name conforms to our standars which are: "[a-zA-z-_0-9]+".
-   * checks this is just characters and numbers and _ and . and -
+   * Checks the name conforms to our standars which are: "[a-zA-z_0-9]+".
+   * checks this is just characters and numbers and _ 
    *
    * @param tableName the name to validate
    * @return none
@@ -246,6 +246,14 @@ public class MetaStoreUtils {
     }
     return false;
   }
+  
+  static public boolean validateColNames(List<FieldSchema> cols) {
+    for (FieldSchema fieldSchema : cols) {
+      if(!validateName(fieldSchema.getName()))
+        return false;
+    }
+    return true;
+  }
 
   /**
    * Change from old to new format properties of a schema file
@@ -395,7 +403,9 @@ public class MetaStoreUtils {
   static HashMap<String, String> typeToThriftTypeMap; 
   static {
     typeToThriftTypeMap = new HashMap<String, String>();
+    typeToThriftTypeMap.put(org.apache.hadoop.hive.serde.Constants.BOOLEAN_TYPE_NAME, "bool");
     typeToThriftTypeMap.put(org.apache.hadoop.hive.serde.Constants.TINYINT_TYPE_NAME, "byte");
+    typeToThriftTypeMap.put(org.apache.hadoop.hive.serde.Constants.SMALLINT_TYPE_NAME, "i16");
     typeToThriftTypeMap.put(org.apache.hadoop.hive.serde.Constants.INT_TYPE_NAME, "i32");
     typeToThriftTypeMap.put(org.apache.hadoop.hive.serde.Constants.BIGINT_TYPE_NAME, "i64");
     typeToThriftTypeMap.put(org.apache.hadoop.hive.serde.Constants.DOUBLE_TYPE_NAME, "double");
@@ -446,7 +456,7 @@ public class MetaStoreUtils {
       ddl.append(col.getName());
     }
     ddl.append("}");
-    LOG.warn("DDL: " + ddl);
+    LOG.info("DDL: " + ddl);
     return ddl.toString();
   }
   public static Properties getSchema(org.apache.hadoop.hive.metastore.api.Table tbl) {
@@ -538,7 +548,7 @@ public class MetaStoreUtils {
    * @throws SerDeException
    * @throws MetaException
    */
-  static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
+  public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
     ObjectInspector oi = deserializer.getObjectInspector();
     String [] names = tableName.split("\\.");
     String last_name = names[names.length-1];
@@ -572,4 +582,5 @@ public class MetaStoreUtils {
     }
     return str_fields;
   }
+
 }

+ 77 - 0
src/contrib/hive/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java

@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
@@ -350,6 +352,81 @@ public class TestHiveMetaStore extends TestCase {
     }
   }
 
+  public void testAlterTable() throws Exception {
+    try {
+      String dbName = "alterdb";
+      String invTblName = "alter-tbl";
+      String tblName = "altertbl";
+
+      client.dropTable(dbName, tblName);
+      client.dropDatabase(dbName);
+      boolean ret = client.createDatabase(dbName, "strange_loc");
+      assertTrue("Unable to create the databse " + dbName, ret);
+
+      ArrayList<FieldSchema> invCols = new ArrayList<FieldSchema>(2);
+      invCols.add(new FieldSchema("n-ame", Constants.STRING_TYPE_NAME, ""));
+      invCols.add(new FieldSchema("in.come", Constants.INT_TYPE_NAME, ""));
+
+      Table tbl = new Table();
+      tbl.setDbName(dbName);
+      tbl.setTableName(invTblName);
+      StorageDescriptor sd = new StorageDescriptor();
+      tbl.setSd(sd);
+      sd.setCols(invCols);
+      sd.setCompressed(false);
+      sd.setNumBuckets(1);
+      sd.setParameters(new HashMap<String, String>());
+      sd.getParameters().put("test_param_1", "Use this for comments etc");
+      sd.setBucketCols(new ArrayList<String>(2));
+      sd.getBucketCols().add("name");
+      sd.setSerdeInfo(new SerDeInfo());
+      sd.getSerdeInfo().setName(tbl.getTableName());
+      sd.getSerdeInfo().setParameters(new HashMap<String, String>());
+      sd.getSerdeInfo().getParameters().put(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "1");
+      boolean failed = false;
+      try {
+        client.createTable(tbl);
+      } catch (InvalidObjectException ex) {
+        failed = true;
+      }
+      if(!failed) {
+        assertTrue("Able to create table with invalid name: " + invTblName, false);
+      }
+      ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(2);
+      cols.add(new FieldSchema("name", Constants.STRING_TYPE_NAME, ""));
+      cols.add(new FieldSchema("income", Constants.INT_TYPE_NAME, ""));
+
+      // create a valid table
+      tbl.setTableName(tblName);
+      tbl.getSd().setCols(cols);
+      client.createTable(tbl);
+      
+      // now try to invalid alter table
+      Table tbl2 = client.getTable(dbName, tblName);
+      failed = false;
+      try {
+        tbl2.setTableName(invTblName);
+        tbl2.getSd().setCols(invCols);
+        client.alter_table(dbName, tblName, tbl2);
+      } catch (InvalidOperationException ex) {
+        failed = true;
+      }
+      if(!failed) {
+        assertTrue("Able to rename table with invalid name: " + invTblName, false);
+      }
+      // try a valid alter table
+      tbl2.setTableName(tblName);
+      tbl2.getSd().setCols(cols);
+      tbl2.getSd().setNumBuckets(32);
+      client.alter_table(dbName, tblName, tbl2);
+      Table tbl3 = client.getTable(dbName, tblName);
+      assertEquals("Alter table didn't succeed. Num buckets ", tbl2.getSd().getNumBuckets(), tbl3.getSd().getNumBuckets());
+    } catch (Exception e) {
+      System.err.println(StringUtils.stringifyException(e));
+      System.err.println("testSimpleTable() failed.");
+      throw e;
+    }
+  }
   public void testComplexTable() throws Exception {
   
     String dbName = "compdb";

+ 2 - 2
src/contrib/hive/metastore/src/test/org/apache/hadoop/hive/metastore/TestPartitions.java

@@ -60,8 +60,8 @@ public class TestPartitions extends MetaStoreTestBase {
       fileSys_.mkdirs(part2);
       List<String> partitions = bar1.getPartitions();
       assertTrue(partitions.size() == 2);
-      assertTrue(partitions.get(0).equals("ds=2008-01-01"));
-      assertTrue(partitions.get(1).equals("ds=2008-01-02"));
+      assertTrue(partitions.contains("ds=2008-01-01"));
+      assertTrue(partitions.contains("ds=2008-01-02"));
       cleanup();
     } catch(MetaException e) {
       e.printStackTrace();

+ 1 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/Driver.java

@@ -218,7 +218,7 @@ public class Driver implements CommandProcessor {
   
   public boolean getResults(Vector<String> res) 
   {
-  	if (sem.getFetchTask() != null) {
+  	if (sem != null && sem.getFetchTask() != null) {
       if (!sem.getFetchTaskInit()) {
         sem.setFetchTaskInit(true);
         sem.getFetchTask().initialize(conf);

+ 7 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java

@@ -73,4 +73,11 @@ public class ColumnInfo implements Serializable {
   public void setInternalName(String internalName) {
     this.internalName = internalName;
   }
+
+  /**
+   * Returns the string representation of the ColumnInfo.
+   */
+  public String toString() {
+    return internalName + ": " + type;
+  }
 }

+ 22 - 2
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java

@@ -74,6 +74,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
 
   transient HiveConf conf;
   static final private int separator  = Utilities.tabCode;
+  static final private int singleQuote  = '\'';
   static final private int terminator = Utilities.newLineCode;
   
   public void initialize(HiveConf conf) {
@@ -95,7 +96,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
 
         // create the table
         Table tbl = new Table(crtTbl.getTableName());
-        tbl.setFields(crtTbl.getCols());
         StorageDescriptor tblStorDesc = tbl.getTTable().getSd();
         if (crtTbl.getBucketCols() != null)
           tblStorDesc.setBucketCols(crtTbl.getBucketCols());
@@ -169,7 +169,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
           List<String> bucketCols = tbl.getBucketCols();
           List<Order> sortCols = tbl.getSortCols();
 
-          if (sortCols.size() >= bucketCols.size())
+          if ( (sortCols.size() > 0) && (sortCols.size() >= bucketCols.size()))
           {
             boolean found = true;
 
@@ -201,6 +201,10 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
         // set create time
         tbl.getTTable().setCreateTime((int) (System.currentTimeMillis()/1000));
 
+        if(crtTbl.getCols() != null) {
+          tbl.setFields(crtTbl.getCols());
+        }
+
         // create the table
         db.createTable(tbl);
         return 0;
@@ -280,6 +284,20 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
           }
           tbl.getTTable().getSd().setCols(alterTbl.getNewCols());
         }
+        else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDPROPS) {
+          tbl.getTTable().getParameters().putAll(alterTbl.getProps());
+        }
+        else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDSERDEPROPS) {
+          tbl.getTTable().getSd().getSerdeInfo().getParameters().putAll(alterTbl.getProps());
+        }
+        else if (alterTbl.getOp() == alterTableDesc.alterTableTypes.ADDSERDE) {
+          tbl.setSerializationLib(alterTbl.getSerdeName());
+          if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0))
+            tbl.getTTable().getSd().getSerdeInfo().getParameters().putAll(alterTbl.getProps());
+          // since serde is modified then do the appropriate things to reset columns etc
+          tbl.reinitSerDe();
+          tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getName(), tbl.getDeserializer()));
+        }
         else {
           console.printError("Unsupported Alter commnad");
           return 1;
@@ -357,7 +375,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
             if (col.getComment() != null)
             {
               os.write(separator);
+              os.write(singleQuote);
               os.write(col.getComment().getBytes("UTF-8"));
+              os.write(singleQuote);
             }
             firstCol = false;
           }

+ 44 - 4
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java

@@ -34,11 +34,13 @@ import org.apache.hadoop.mapred.*;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.plan.mapredWork;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.io.*;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.session.SessionState;
 
 public class ExecDriver extends Task<mapredWork> implements Serializable {
 
@@ -54,12 +56,37 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
     super();
   }
 
+  public static String getRealFiles(Configuration conf) {
+    // fill in local files to be added to the task environment
+    SessionState ss = SessionState.get();
+    Set<String> files = (ss == null) ? null : ss.list_resource(SessionState.ResourceType.FILE, null);
+    if(files != null) {
+      ArrayList<String> realFiles = new ArrayList<String> (files.size());
+      for(String one: files) {
+        try {
+          realFiles.add(Utilities.realFile(one, conf));
+        } catch (IOException e) {
+          throw new RuntimeException ("Cannot validate file " + one +
+                                      "due to exception: " + e.getMessage(), e);
+        }
+      }
+      return StringUtils.join(realFiles, ",");
+    } else {
+      return "";
+    }
+  }
+
+
   /**
    * Initialization when invoked from QL
    */
   public void initialize (HiveConf conf) {
     super.initialize(conf);
     job = new JobConf(conf, ExecDriver.class);
+    String realFiles = getRealFiles(job);
+    if (realFiles != null && realFiles.length() > 0) {
+      job.set("tmpfiles", realFiles);
+    }
   }
 
   /**
@@ -121,8 +148,7 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
               }
             }
           }
-        }
-                                           );
+        });
     }
   }
 
@@ -207,6 +233,7 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
 
     Utilities.setMapRedWork(job, work);
     
+    
     for(String onefile: work.getPathToAliases().keySet()) {
       LOG.info("Adding input file " + onefile);
       FileInputFormat.addInputPaths(job, onefile);
@@ -217,8 +244,8 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
     FileOutputFormat.setOutputPath(job, new Path(jobScratchDir));
     job.setMapperClass(ExecMapper.class);
     
-    job.setMapOutputValueClass(Text.class);
     job.setMapOutputKeyClass(HiveKey.class);    
+    job.setMapOutputValueClass(BytesWritable.class);
     
     job.setNumReduceTasks(work.getNumReduceTasks().intValue());
     job.setReducerClass(ExecReducer.class);
@@ -265,6 +292,10 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
       
       inferNumReducers();
       JobClient jc = new JobClient(job);
+      
+      // make this client wait if job trcker is not behaving well.
+      Throttle.checkJobTracker(job, LOG);
+
       rj = jc.submitJob(job);
 
       // add to list of running jobs so in case of abnormal shutdown can kill it.
@@ -306,7 +337,8 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
   }
   
   private static void printUsage() {
-    System.out.println("ExecDriver -plan <plan-file> [-jobconf k1=v1 [-jobconf k2=v2] ...]");
+    System.out.println("ExecDriver -plan <plan-file> [-jobconf k1=v1 [-jobconf k2=v2] ...] "+
+                       "[-files <file1>[,<file2>] ...]");
     System.exit(1);
   }
 
@@ -314,15 +346,19 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
     String planFileName = null;
     ArrayList<String> jobConfArgs = new ArrayList<String> ();
     boolean isSilent = false;
+    String files = null;
 
     try{
       for(int i=0; i<args.length; i++) {
         if(args[i].equals("-plan")) {
           planFileName = args[++i];
+          System.out.println("plan = "+planFileName);
         } else if (args[i].equals("-jobconf")) {
           jobConfArgs.add(args[++i]);
         } else if (args[i].equals("-silent")) {
           isSilent = true;
+        } else if (args[i].equals("-files")) {
+          files = args[++i];
         }
       }
     } catch (IndexOutOfBoundsException e) {
@@ -350,6 +386,10 @@ public class ExecDriver extends Task<mapredWork> implements Serializable {
       }
     }
 
+    if(files != null) {
+      conf.set("tmpfiles", files);
+    }
+
     URI pathURI = (new Path(planFileName)).toUri();
     InputStream pathData;
     if(StringUtils.isEmpty(pathURI.getScheme())) {

+ 16 - 28
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecReducer.java

@@ -27,20 +27,17 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
 
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.mapredWork;
 import org.apache.hadoop.hive.ql.plan.tableDesc;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.exec.ExecMapper.reportStats;
-import org.apache.hadoop.hive.serde2.ColumnSet;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
 
 public class ExecReducer extends MapReduceBase implements Reducer {
 
@@ -74,15 +71,23 @@ public class ExecReducer extends MapReduceBase implements Reducer {
     reducer.setMapredWork(gWork);
     isTagged = gWork.getNeedsTagging();
     try {
-      // We should initialize the SerDe with the TypeInfo when available.
-      tableDesc keyTableDesc = PlanUtils.getReduceKeyDesc(gWork);
+      tableDesc keyTableDesc = gWork.getKeyDesc();
       inputKeyDeserializer = (SerDe)ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
       inputKeyDeserializer.initialize(null, keyTableDesc.getProperties());
-      for(int tag=0; tag<Byte.MAX_VALUE; tag++) {
+      keyObjectInspector = inputKeyDeserializer.getObjectInspector();
+      for(int tag=0; tag<gWork.getTagToValueDesc().size(); tag++) {
         // We should initialize the SerDe with the TypeInfo when available.
-        tableDesc valueTableDesc = PlanUtils.getReduceValueDesc(gWork, tag);
+        tableDesc valueTableDesc = gWork.getTagToValueDesc().get(tag);
         inputValueDeserializer[tag] = (SerDe)ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
         inputValueDeserializer[tag].initialize(null, valueTableDesc.getProperties());
+        valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
+        
+        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
+        ois.add(keyObjectInspector);
+        ois.add(valueObjectInspector[tag]);
+        ois.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(Byte.class));
+        rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(
+            Arrays.asList(fieldNames), ois);
       }
     } catch (SerDeException e) {
       throw new RuntimeException(e);
@@ -143,18 +148,12 @@ public class ExecReducer extends MapReduceBase implements Reducer {
       } catch (SerDeException e) {
         throw new HiveException(e);
       }
-      // This is a hack for generating the correct ObjectInspector.
-      // In the future, we should use DynamicSerde and initialize it using the type info. 
-      if (keyObjectInspector == null) {
-        // Directly create ObjectInspector here because we didn't know the number of cols till now.
-        keyObjectInspector = MetadataListStructObjectInspector.getInstance(((ColumnSet)keyObject).col.size()); 
-      }
       // System.err.print(keyObject.toString());
       while (values.hasNext()) {
-        Text valueText = (Text)values.next();
+        Writable valueWritable = (Writable) values.next();
         //System.err.print(who.getHo().toString());
         try {
-          valueObject[tag] = inputValueDeserializer[tag].deserialize(valueText);
+          valueObject[tag] = inputValueDeserializer[tag].deserialize(valueWritable);
         } catch (SerDeException e) {
           throw new HiveException(e);
         }
@@ -162,23 +161,12 @@ public class ExecReducer extends MapReduceBase implements Reducer {
         row.add(keyObject);
         row.add(valueObject[tag]);
         row.add(tag);
-        if (valueObjectInspector[tag] == null) {
-          // Directly create ObjectInspector here because we didn't know the number of cols till now.
-          valueObjectInspector[tag] = MetadataListStructObjectInspector.getInstance(((ColumnSet)valueObject[tag]).col.size());
-          ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
-          ois.add(keyObjectInspector);
-          ois.add(valueObjectInspector[tag]);
-          ois.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(Byte.class));
-          rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(
-              Arrays.asList(fieldNames), ois);
-        }
         reducer.process(row, rowObjectInspector[tag]);
       }
 
-
     } catch (HiveException e) {
       abort = true;
-      throw new IOException (e.getMessage());
+      throw new IOException (e);
     }
   }
 

+ 2 - 13
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFuncEvaluator.java

@@ -72,19 +72,8 @@ public class ExprNodeFuncEvaluator extends ExprNodeEvaluator {
       paramEvaluators[i].evaluate(row, rowInspector, paramInspectableObjects[i]);
       paramValues[i] = paramInspectableObjects[i].o;
     }
-    try {
-      result.o = udfMethod.invoke(udf, paramValues);
-      result.oi = outputObjectInspector;
-    } catch (Exception e) {
-      if (e instanceof HiveException) {
-        throw (HiveException)e;
-      } else if (e instanceof RuntimeException) {
-        throw (RuntimeException)e;
-      } else {
-        throw new HiveException("Unable to execute UDF function " + udf.getClass() + " " 
-          + udfMethod + " on inputs " + "(" + paramValues.length + ") " + Arrays.asList(paramValues) + ": " + e.getMessage(), e);
-      }
-    }
+    result.o = FunctionRegistry.invoke(udfMethod, udf, paramValues);
+    result.oi = outputObjectInspector;
   }
 
   public ObjectInspector evaluateInspector(ObjectInspector rowInspector)

+ 2 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java

@@ -44,4 +44,6 @@ public class ExtractOperator extends Operator<extractDesc> implements Serializab
     eval.evaluate(row, rowInspector, result);
     forward(result.o, result.oi);
   }
+
+  
 }

+ 138 - 33
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java

@@ -20,7 +20,12 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.io.Serializable;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Vector;
 import java.util.Properties;
@@ -29,8 +34,11 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.plan.fetchWork;
+import org.apache.hadoop.hive.ql.plan.partitionDesc;
+import org.apache.hadoop.hive.ql.plan.tableDesc;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -45,6 +53,8 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
 /**
  * FetchTask implementation
@@ -56,42 +66,27 @@ public class FetchTask extends Task<fetchWork> implements Serializable {
   
   public void initialize (HiveConf conf) {
    	super.initialize(conf);
-    splitNum = 0;
     currRecReader = null;
     
    	try {
        // Create a file system handle
        fs = FileSystem.get(conf);   
-       serde = work.getDeserializerClass().newInstance();
-       serde.initialize(null, work.getSchema());
        job = new JobConf(conf, ExecDriver.class);
-       Path inputP = work.getSrcDir();
-       if(!fs.exists(inputP)) {
-         empty = true;
-         return;
-       }
-
-       empty = true;
-       FileStatus[] fStats = fs.listStatus(inputP);
-       for (FileStatus fStat:fStats) {
-         if (fStat.getLen() > 0) {
-           empty = false;
-           break;
-         }
-       }
-
-       if (empty)
-         return;
-
-       FileInputFormat.setInputPaths(job, inputP);
-       inputFormat = getInputFormatFromCache(work.getInputFormatClass(), job);
-	     inputSplits = inputFormat.getSplits(job, 1);
+       
 	 	   mSerde = new MetadataTypedColumnsetSerDe();
        Properties mSerdeProp = new Properties();
        mSerdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode);
        mSerdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
        mSerde.initialize(null, mSerdeProp);
+       
+       currPath = null;
+       currTbl = null;
+       currPart = null;
+       iterPath = null;
+       iterPartDesc = null;
        totalRows = 0;
+       tblDataDone = false;
+       rowWithPart = new Object[2];
     } catch (Exception e) {
       // Bail out ungracefully - we should never hit
       // this here - but would have hit it in SemanticAnalyzer
@@ -136,11 +131,116 @@ public class FetchTask extends Task<fetchWork> implements Serializable {
 	private Deserializer  serde;
 	private MetadataTypedColumnsetSerDe mSerde;
 	private int totalRows;
-  private boolean empty;
+  private Iterator<Path> iterPath;
+  private Iterator<partitionDesc> iterPartDesc; 
+  private Path currPath;
+  private partitionDesc currPart;
+  private tableDesc     currTbl;
+  private boolean       tblDataDone;
+  private StructObjectInspector rowObjectInspector;
+  private Object[] rowWithPart;
+
+  private void setPrtnDesc() throws Exception {
+    List<String> partNames = new ArrayList<String>();
+    List<String> partValues = new ArrayList<String>();
+    
+    String pcols = currPart.getTableDesc().getProperties().getProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
+    LinkedHashMap<String, String> partSpec = currPart.getPartSpec();
+    
+    List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
+    String[] partKeys = pcols.trim().split("/");
+    for(String key: partKeys) {
+      partNames.add(key);
+      partValues.add(partSpec.get(key));
+      partObjectInspectors.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
+    }
+    StructObjectInspector partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
+    rowObjectInspector = (StructObjectInspector)serde.getObjectInspector();
+    
+    rowWithPart[1] = partValues;
+    rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(new StructObjectInspector[]{
+                                                                                              rowObjectInspector, partObjectInspector}));
+  }
+
+  private void getNextPath() throws Exception {
+    // first time
+    if (iterPath == null) {
+      if (work.getTblDir() != null) {
+        if (!tblDataDone) {
+          currPath = work.getTblDir();
+          currTbl = work.getTblDesc();
+          if (fs.exists(currPath)) 
+          {
+            FileStatus[] fStats = fs.listStatus(currPath);
+            for (FileStatus fStat:fStats) {
+              if (fStat.getLen() > 0) {
+                tblDataDone = true;
+                break;
+              }
+            }
+          }
+
+          if (!tblDataDone) currPath = null;
+          return;
+        } else {
+          currTbl = null;
+          currPath = null;
+        }
+        return;
+      }
+      else {
+        iterPath = work.getPartDir().iterator();
+        iterPartDesc = work.getPartDesc().iterator();
+      }
+    }
+
+		while (iterPath.hasNext()) {
+			Path nxt = iterPath.next();
+      partitionDesc prt = iterPartDesc.next();
+		  if (fs.exists(nxt)) 
+      {
+        FileStatus[] fStats = fs.listStatus(nxt);
+        for (FileStatus fStat:fStats) {
+          if (fStat.getLen() > 0) {
+            currPath = nxt;
+            currPart = prt;
+            return;
+          }
+        }
+      }
+		}
+	}
   
  	private RecordReader<WritableComparable, Writable> getRecordReader() throws Exception {
-		if (splitNum >= inputSplits.length) 
-  	  return null;
+ 		if (currPath == null) {
+ 			getNextPath();
+ 			if (currPath == null)
+ 				return null;
+
+ 			FileInputFormat.setInputPaths(job, currPath);
+      tableDesc tmp = currTbl;
+      if (tmp == null)
+        tmp = currPart.getTableDesc();
+ 			inputFormat = getInputFormatFromCache(tmp.getInputFileFormatClass(), job);
+ 			inputSplits = inputFormat.getSplits(job, 1); 		
+ 			splitNum = 0;
+      serde = tmp.getDeserializerClass().newInstance();
+      serde.initialize(null, tmp.getProperties());
+      LOG.debug("Creating fetchTask with deserializer typeinfo: " + serde.getObjectInspector().getTypeName());
+      LOG.debug("deserializer properties: " + tmp.getProperties());
+      if (!tblDataDone)
+        setPrtnDesc();
+ 		}
+ 		
+ 		if (splitNum >= inputSplits.length) {
+ 			if (currRecReader != null) {
+ 				currRecReader.close();
+        currRecReader = null;
+      }
+ 			currPath = null;
+ 			return getRecordReader();
+ 		}
+ 		
 		currRecReader = inputFormat.getRecordReader(inputSplits[splitNum++], job, Reporter.NULL);
 		key = currRecReader.createKey();
 		value = currRecReader.createValue();
@@ -149,16 +249,15 @@ public class FetchTask extends Task<fetchWork> implements Serializable {
  	
   public boolean fetch(Vector<String> res) {
   	try {
-      if (empty)
-        return false;
-
       int numRows = 0;
       int rowsRet = MAX_ROWS;
       if ((work.getLimit() >= 0) && ((work.getLimit() - totalRows) < rowsRet))
         rowsRet = work.getLimit() - totalRows;
       if (rowsRet <= 0) {
-        if (currRecReader != null)
+        if (currRecReader != null) {
           currRecReader.close();
+          currRecReader = null;
+        }
         return false;
       }
 
@@ -174,12 +273,18 @@ public class FetchTask extends Task<fetchWork> implements Serializable {
   	    }
       	boolean ret = currRecReader.next(key, value);
    	  	if (ret) {
-   	  		Object obj = serde.deserialize(value);
-   	  		res.add(((Text)mSerde.serialize(obj, serde.getObjectInspector())).toString());
+          if (tblDataDone) {
+            Object obj = serde.deserialize(value);
+            res.add(((Text)mSerde.serialize(obj, serde.getObjectInspector())).toString());
+          } else {
+            rowWithPart[0] = serde.deserialize(value);
+            res.add(((Text)mSerde.serialize(rowWithPart, rowObjectInspector)).toString());
+          }
    	  		numRows++;
    	  	}
    	  	else {
           currRecReader.close();
+          currRecReader = null;
    	  		currRecReader = getRecordReader();
    	  		if (currRecReader == null) {
             if (numRows == 0) 

+ 16 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java

@@ -19,13 +19,18 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.*;
+import java.util.HashMap;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.filterDesc;
 import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
 
 /**
  * Filter operator implementation
@@ -73,4 +78,15 @@ public class FilterOperator extends Operator <filterDesc> implements Serializabl
           conditionInspectableObject.o.getClass().getName());
     }
   }
+  
+  public List<String> mergeColListsFromChildren(List<String> colList,
+                                        HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) {
+    exprNodeDesc condn = conf.getPredicate();
+
+    // get list of columns used in the filter
+    List<String> cl = condn.getCols();
+
+    return Utilities.mergeUniqElems(colList, cl);
+  }
+
 }

+ 62 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java

@@ -22,6 +22,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
 import java.lang.reflect.Method;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -29,6 +30,8 @@ import java.util.Map;
 import java.lang.Void;
 
 import org.apache.hadoop.hive.ql.exec.FunctionInfo.OperatorType;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.groupByDesc;
 import org.apache.hadoop.hive.ql.udf.*;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 
@@ -247,7 +250,7 @@ public class FunctionRegistry {
 
   /**
    * This method is shared between UDFRegistry and UDAFRegistry.
-   * methodName will be "evaluate" for UDFRegistry, and "aggregate" for UDAFRegistry. 
+   * methodName will be "evaluate" for UDFRegistry, and "aggregate"/"evaluate"/"evaluatePartial" for UDAFRegistry. 
    */
   public static <T> Method getMethodInternal(Class<? extends T> udfClass, String methodName, boolean exact, List<Class<?>> argumentClasses) {
     int leastImplicitConversions = Integer.MAX_VALUE;
@@ -319,6 +322,9 @@ public class FunctionRegistry {
     return result;
   }
 
+  /**
+   * Returns the "aggregate" method of the UDAF.
+   */
   public static Method getUDAFMethod(String name, List<Class<?>> argumentClasses) {
     Class<? extends UDAF> udaf = getUDAF(name);
     if (udaf == null)
@@ -327,7 +333,62 @@ public class FunctionRegistry {
                                          argumentClasses);
   }
 
+  /**
+   * Returns the evaluate method for the UDAF based on the aggregation mode.
+   * See groupByDesc.Mode for details.
+   * 
+   * @param name  name of the UDAF
+   * @param mode  the mode of the aggregation
+   * @return      null if no such UDAF is found
+   */
+  public static Method getUDAFEvaluateMethod(String name, groupByDesc.Mode mode) {
+    Class<? extends UDAF> udaf = getUDAF(name);
+    if (udaf == null)
+      return null;
+    return FunctionRegistry.getMethodInternal(udaf, 
+        (mode == groupByDesc.Mode.COMPLETE || mode == groupByDesc.Mode.FINAL) 
+        ? "evaluate" : "evaluatePartial", true,
+        new ArrayList<Class<?>>() );
+  }
+
+  /**
+   * Returns the "aggregate" method of the UDAF.
+   */
   public static Method getUDAFMethod(String name, Class<?>... argumentClasses) {
     return getUDAFMethod(name, Arrays.asList(argumentClasses));
   }
+  
+  public static Object invoke(Method m, Object thisObject, Object[] arguments) throws HiveException {
+    Object o;
+    try {
+      o = m.invoke(thisObject, arguments);
+    } catch (Exception e) {
+      String thisObjectString = "" + thisObject + " of class " + 
+        (thisObject == null? "null" : thisObject.getClass().getName());
+
+      StringBuilder argumentString = new StringBuilder();
+      if (arguments == null) {
+        argumentString.append("null");
+      } else {
+        argumentString.append("{");
+        for (int i=0; i<arguments.length; i++) {
+          if (i>0) {
+            argumentString.append(", ");
+          }
+          if (arguments[i] == null) {
+            argumentString.append("null");
+          } else {
+            argumentString.append("" + arguments[i] + ":" + arguments[i].getClass().getName());
+          }
+        }
+        argumentString.append("} of size " + arguments.length);
+      }
+      
+      throw new HiveException("Unable to execute method " + m + " " 
+          + " on object " + thisObjectString
+          + " with arguments " + argumentString.toString() 
+          + ":" + e.getMessage());
+    }
+    return o;
+  }
 }

+ 232 - 135
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java

@@ -18,9 +18,12 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Iterator;
+import java.util.Map;
 import java.io.Serializable;
 import java.lang.reflect.Method;
 
@@ -32,6 +35,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
 
 /**
  * GroupBy operator implementation.
@@ -61,98 +66,112 @@ public class GroupByOperator extends Operator <groupByDesc> implements Serializa
   transient protected HashMap<ArrayList<Object>, UDAF[]> hashAggregations;
   
   transient boolean firstRow;
-  
+  transient long    totalMemory;
+  transient boolean hashAggr;
+
   public void initialize(Configuration hconf) throws HiveException {
     super.initialize(hconf);
-    try {
-      // init keyFields
-      keyFields = new ExprNodeEvaluator[conf.getKeys().size()];
-      for (int i = 0; i < keyFields.length; i++) {
-        keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i));
-      }
-    
-      // init aggregationParameterFields
-      aggregationParameterFields = new ExprNodeEvaluator[conf.getAggregators().size()][];
-      for (int i = 0; i < aggregationParameterFields.length; i++) {
-        ArrayList<exprNodeDesc> parameters = conf.getAggregators().get(i).getParameters();
-        aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
-        for (int j = 0; j < parameters.size(); j++) {
-          aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j));
-        }
-      }
-      // init aggregationIsDistinct
-      aggregationIsDistinct = new boolean[conf.getAggregators().size()];
-      for(int i=0; i<aggregationIsDistinct.length; i++) {
-        aggregationIsDistinct[i] = conf.getAggregators().get(i).getDistinct();
-      }
+    totalMemory = Runtime.getRuntime().totalMemory();
 
-      // init aggregationClasses  
-      aggregationClasses = (Class<? extends UDAF>[]) new Class[conf.getAggregators().size()];
-      for (int i = 0; i < conf.getAggregators().size(); i++) {
-        aggregationDesc agg = conf.getAggregators().get(i);
-        aggregationClasses[i] = agg.getAggregationClass();
+    // init keyFields
+    keyFields = new ExprNodeEvaluator[conf.getKeys().size()];
+    for (int i = 0; i < keyFields.length; i++) {
+      keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i));
+    }
+  
+    // init aggregationParameterFields
+    aggregationParameterFields = new ExprNodeEvaluator[conf.getAggregators().size()][];
+    for (int i = 0; i < aggregationParameterFields.length; i++) {
+      ArrayList<exprNodeDesc> parameters = conf.getAggregators().get(i).getParameters();
+      aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
+      for (int j = 0; j < parameters.size(); j++) {
+        aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j));
       }
+    }
+    // init aggregationIsDistinct
+    aggregationIsDistinct = new boolean[conf.getAggregators().size()];
+    for(int i=0; i<aggregationIsDistinct.length; i++) {
+      aggregationIsDistinct[i] = conf.getAggregators().get(i).getDistinct();
+    }
 
-      // init aggregations, aggregationsAggregateMethods,
-      // aggregationsEvaluateMethods
-      aggregationsAggregateMethods = new Method[aggregationClasses.length];
-      aggregationsEvaluateMethods = new Method[aggregationClasses.length];
-      String aggregateMethodName = (conf.getMode() == groupByDesc.Mode.PARTIAL2 
-         ? "aggregatePartial" : "aggregate");
-      String evaluateMethodName = ((conf.getMode() == groupByDesc.Mode.PARTIAL1 || conf.getMode() == groupByDesc.Mode.HASH)
-         ? "evaluatePartial" : "evaluate");
-      for(int i=0; i<aggregationClasses.length; i++) {
-        // aggregationsAggregateMethods
-        for( Method m : aggregationClasses[i].getMethods() ){
-          if( m.getName().equals( aggregateMethodName ) 
-              && m.getParameterTypes().length == aggregationParameterFields[i].length) {              
-            aggregationsAggregateMethods[i] = m;
-            break;
-          }
-        }
-        if (null == aggregationsAggregateMethods[i]) {
-          throw new RuntimeException("Cannot find " + aggregateMethodName + " method of UDAF class "
-                                   + aggregationClasses[i].getName() + " that accepts "
-                                   + aggregationParameterFields[i].length + " parameters!");
-        }
-        // aggregationsEvaluateMethods
-        aggregationsEvaluateMethods[i] = aggregationClasses[i].getMethod(evaluateMethodName);
+    // init aggregationClasses  
+    aggregationClasses = (Class<? extends UDAF>[]) new Class[conf.getAggregators().size()];
+    for (int i = 0; i < conf.getAggregators().size(); i++) {
+      aggregationDesc agg = conf.getAggregators().get(i);
+      aggregationClasses[i] = agg.getAggregationClass();
+    }
+
+    // init aggregations, aggregationsAggregateMethods,
+    // aggregationsEvaluateMethods
+    aggregationsAggregateMethods = new Method[aggregationClasses.length];
+    aggregationsEvaluateMethods = new Method[aggregationClasses.length];
+    String evaluateMethodName = ((conf.getMode() == groupByDesc.Mode.PARTIAL1 || conf.getMode() == groupByDesc.Mode.HASH ||
+                                  conf.getMode() == groupByDesc.Mode.PARTIAL2)
+                                 ? "evaluatePartial" : "evaluate");
+
+    for(int i=0; i<aggregationClasses.length; i++) {
+      String aggregateMethodName = (((conf.getMode() == groupByDesc.Mode.PARTIAL1) || (conf.getMode() == groupByDesc.Mode.HASH)) ? "aggregate" : "aggregatePartial");
 
-        if (null == aggregationsEvaluateMethods[i]) {
-          throw new RuntimeException("Cannot find " + evaluateMethodName + " method of UDAF class "
-                                   + aggregationClasses[i].getName() + "!");
+      if (aggregationIsDistinct[i] && (conf.getMode() != groupByDesc.Mode.FINAL))
+        aggregateMethodName = "aggregate";
+      // aggregationsAggregateMethods
+      for( Method m : aggregationClasses[i].getMethods() ){
+        if( m.getName().equals( aggregateMethodName ) 
+            && m.getParameterTypes().length == aggregationParameterFields[i].length) {              
+          aggregationsAggregateMethods[i] = m;
+          break;
         }
-        assert(aggregationsEvaluateMethods[i] != null);
       }
-
-      if (conf.getMode() != groupByDesc.Mode.HASH) {
-        aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
-        aggregations = newAggregations();
-      } else {
-        hashAggregations = new HashMap<ArrayList<Object>, UDAF[]>();
+      if (null == aggregationsAggregateMethods[i]) {
+        throw new HiveException("Cannot find " + aggregateMethodName + " method of UDAF class "
+                                 + aggregationClasses[i].getName() + " that accepts "
+                                 + aggregationParameterFields[i].length + " parameters!");
       }
-      // init objectInspectors
-      int totalFields = keyFields.length + aggregationClasses.length;
-      objectInspectors = new ArrayList<ObjectInspector>(totalFields);
-      for(int i=0; i<keyFields.length; i++) {
-        objectInspectors.add(null);
+      // aggregationsEvaluateMethods
+      try {
+        aggregationsEvaluateMethods[i] = aggregationClasses[i].getMethod(evaluateMethodName);
+      } catch (Exception e) {
+        throw new HiveException("Unable to get the method named " + evaluateMethodName + " from " 
+            + aggregationClasses[i] + ": " + e.getMessage());
       }
-      for(int i=0; i<aggregationClasses.length; i++) {
-        objectInspectors.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(
-            aggregationsEvaluateMethods[i].getReturnType()));
+
+      if (null == aggregationsEvaluateMethods[i]) {
+        throw new HiveException("Cannot find " + evaluateMethodName + " method of UDAF class "
+                                 + aggregationClasses[i].getName() + "!");
       }
-      
-      firstRow = true;
-    } catch (Exception e) {
-      e.printStackTrace();
-      throw new RuntimeException(e);
+      assert(aggregationsEvaluateMethods[i] != null);
+    }
+
+    aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
+    if (conf.getMode() != groupByDesc.Mode.HASH) {
+      aggregations = newAggregations();
+      hashAggr = false;
+    } else {
+      hashAggregations = new HashMap<ArrayList<Object>, UDAF[]>();
+      hashAggr = true;
     }
+    // init objectInspectors
+    int totalFields = keyFields.length + aggregationClasses.length;
+    objectInspectors = new ArrayList<ObjectInspector>(totalFields);
+    for(int i=0; i<keyFields.length; i++) {
+      objectInspectors.add(null);
+    }
+    for(int i=0; i<aggregationClasses.length; i++) {
+      objectInspectors.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(
+          aggregationsEvaluateMethods[i].getReturnType()));
+    }
+    
+    firstRow = true;
   }
 
-  protected UDAF[] newAggregations() throws Exception {      
+  protected UDAF[] newAggregations() throws HiveException {      
     UDAF[] aggs = new UDAF[aggregationClasses.length];
     for(int i=0; i<aggregationClasses.length; i++) {
-      aggs[i] = aggregationClasses[i].newInstance();
+      try {
+        aggs[i] = aggregationClasses[i].newInstance();
+      } catch (Exception e) {
+        throw new HiveException("Unable to create an instance of class " + aggregationClasses[i] + ": " + e.getMessage());
+      }
       aggs[i].init();
     }
     return aggs;
@@ -160,7 +179,8 @@ public class GroupByOperator extends Operator <groupByDesc> implements Serializa
 
   InspectableObject tempInspectableObject = new InspectableObject();
   
-  protected void updateAggregations(UDAF[] aggs, Object row, ObjectInspector rowInspector, Object[][] lastInvoke) throws Exception {
+  protected void updateAggregations(UDAF[] aggs, Object row, ObjectInspector rowInspector, boolean hashAggr, boolean newEntry,
+                                    Object[][] lastInvoke) throws HiveException {
     for(int ai=0; ai<aggs.length; ai++) {
       // Calculate the parameters 
       Object[] o = new Object[aggregationParameterFields[ai].length];
@@ -168,24 +188,35 @@ public class GroupByOperator extends Operator <groupByDesc> implements Serializa
         aggregationParameterFields[ai][pi].evaluate(row, rowInspector, tempInspectableObject);
         o[pi] = tempInspectableObject.o; 
       }
+
       // Update the aggregations.
-      if (aggregationIsDistinct[ai] && lastInvoke != null) {
-        // different differentParameters?
-        boolean differentParameters = (lastInvoke[ai] == null);
-        if (!differentParameters) {
-          for(int pi=0; pi<o.length; pi++) {
-            if (!o[pi].equals(lastInvoke[ai][pi])) {
-              differentParameters = true;
-              break;
-            }
+      if (aggregationIsDistinct[ai]) {
+        if (hashAggr) {
+          if (newEntry) {
+            FunctionRegistry.invoke(aggregationsAggregateMethods[ai], aggs[ai], o);
           }
-        }  
-        if (differentParameters) {
-          aggregationsAggregateMethods[ai].invoke(aggs[ai], o);
-          lastInvoke[ai] = o;
         }
-      } else {
-        aggregationsAggregateMethods[ai].invoke(aggs[ai], o);
+        else {
+          boolean differentParameters = false;
+          if ((lastInvoke == null) || (lastInvoke[ai] == null))
+            differentParameters = true;
+          else {
+            for(int pi=0; pi<o.length; pi++) {
+              if (!o[pi].equals(lastInvoke[ai][pi])) {
+                differentParameters = true;
+                break;
+              }
+            }  
+          }
+
+          if (differentParameters) {
+            FunctionRegistry.invoke(aggregationsAggregateMethods[ai], aggs[ai], o);
+            lastInvoke[ai] = o;
+          }
+        }
+      }
+      else {
+        FunctionRegistry.invoke(aggregationsAggregateMethods[ai], aggs[ai], o);
       }
     }
   }
@@ -208,53 +239,98 @@ public class GroupByOperator extends Operator <groupByDesc> implements Serializa
         for(int i=0; i<objectInspectors.size(); i++) {
           fieldNames.add(Integer.valueOf(i).toString());
         }
-        outputObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
-          fieldNames, objectInspectors);
-      }
-      // Prepare aggs for updating
-      UDAF[] aggs = null;
-      Object[][] lastInvoke = null;
-      if (aggregations != null) {
-        // sort-based aggregation
-        // Need to forward?
-        boolean keysAreEqual = newKeys.equals(currentKeys);
-        if (currentKeys != null && !keysAreEqual) {
-          forward(currentKeys, aggregations);
-        }
-        // Need to update the keys?
-        if (currentKeys == null || !keysAreEqual) {
-          currentKeys = newKeys;
-          // init aggregations
-          for(UDAF aggregation: aggregations) {
-            aggregation.init();
-          }
-          // clear parameters in last-invoke
-          for(int i=0; i<aggregationsParametersLastInvoke.length; i++) {
-            aggregationsParametersLastInvoke[i] = null;
-          }
-        }
-        aggs = aggregations;
-        lastInvoke = aggregationsParametersLastInvoke;
-      } else {
-        // hash-based aggregations
-        aggs = hashAggregations.get(newKeys);
-        if (aggs == null) {
-          aggs = newAggregations();
-          hashAggregations.put(newKeys, aggs);
-          // TODO: Hash aggregation does not support DISTINCT now
-          lastInvoke = null;
-        }
+        outputObjectInspector = 
+          ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, objectInspectors);
       }
 
-      // Update the aggs
-      updateAggregations(aggs, row, rowInspector, lastInvoke);
-
+      if (hashAggr)
+        processHashAggr(row, rowInspector, newKeys);
+      else
+        processAggr(row, rowInspector, newKeys);
+    } catch (HiveException e) {
+      throw e;
     } catch (Exception e) {
-      e.printStackTrace();
       throw new HiveException(e);
     }
   }
-  
+
+  private void processHashAggr(Object row, ObjectInspector rowInspector, ArrayList<Object> newKeys) throws HiveException {
+    // Prepare aggs for updating
+    UDAF[] aggs = null;
+    boolean newEntry = false;
+
+    // hash-based aggregations
+    aggs = hashAggregations.get(newKeys);
+    if (aggs == null) {
+      aggs = newAggregations();
+      hashAggregations.put(newKeys, aggs);
+      newEntry = true;
+    }
+    
+    // Update the aggs
+    updateAggregations(aggs, row, rowInspector, true, newEntry, null);
+    
+    // currently, we use a simple approximation - if 90% of memory is being
+    // used, flush 
+    long freeMemory = Runtime.getRuntime().freeMemory();
+    if (shouldBeFlushed(totalMemory, freeMemory)) {
+      flush();
+    }
+  }
+
+  private void processAggr(Object row, ObjectInspector rowInspector, ArrayList<Object> newKeys) throws HiveException {
+    // Prepare aggs for updating
+    UDAF[] aggs = null;
+    Object[][] lastInvoke = null;
+    boolean keysAreEqual = newKeys.equals(currentKeys);
+    
+    // forward the current keys if needed for sort-based aggregation
+    if (currentKeys != null && !keysAreEqual)
+      forward(currentKeys, aggregations);
+    
+    // Need to update the keys?
+    if (currentKeys == null || !keysAreEqual) {
+      currentKeys = newKeys;
+      
+      // init aggregations
+      for(UDAF aggregation: aggregations)
+        aggregation.init();
+      
+      // clear parameters in last-invoke
+      for(int i=0; i<aggregationsParametersLastInvoke.length; i++)
+        aggregationsParametersLastInvoke[i] = null;
+    }
+    
+    aggs = aggregations;
+    
+    lastInvoke = aggregationsParametersLastInvoke;
+    // Update the aggs
+    updateAggregations(aggs, row, rowInspector, false, false, lastInvoke);
+  }
+
+  private boolean shouldBeFlushed(long total, long free) {
+    if (10 * free >= total)
+      return true;
+    return false;
+  }
+
+  private void flush() throws HiveException {
+    // Currently, the algorithm flushes 10% of the entries - this can be
+    // changed in the future
+
+    int oldSize = hashAggregations.size();
+    Iterator iter = hashAggregations.entrySet().iterator();
+    int numDel = 0;
+    while (iter.hasNext()) {
+      Map.Entry<ArrayList<Object>, UDAF[]> m = (Map.Entry)iter.next();
+      forward(m.getKey(), m.getValue());
+      iter.remove();
+      numDel++;
+      if (numDel * 10 >= oldSize)
+        return;
+    }
+  }
+
   /**
    * Forward a record of keys and aggregation results.
    * 
@@ -262,14 +338,19 @@ public class GroupByOperator extends Operator <groupByDesc> implements Serializa
    *          The keys in the record
    * @throws HiveException
    */
-  protected void forward(ArrayList<Object> keys, UDAF[] aggs) throws Exception {
+  protected void forward(ArrayList<Object> keys, UDAF[] aggs) throws HiveException {
     int totalFields = keys.size() + aggs.length;
     List<Object> a = new ArrayList<Object>(totalFields);
     for(int i=0; i<keys.size(); i++) {
       a.add(keys.get(i));
     }
     for(int i=0; i<aggs.length; i++) {
-      a.add(aggregationsEvaluateMethods[i].invoke(aggs[i]));
+      try {
+        a.add(aggregationsEvaluateMethods[i].invoke(aggs[i]));
+      } catch (Exception e) {
+        throw new HiveException("Unable to execute UDAF function " + aggregationsEvaluateMethods[i] + " " 
+            + " on object " + "(" + aggs[i] + ") " + ": " + e.getMessage());
+      }
     }
     forward(a, outputObjectInspector);
   }
@@ -304,4 +385,20 @@ public class GroupByOperator extends Operator <groupByDesc> implements Serializa
     super.close(abort);
   }
 
+  // Group by contains the columns needed - no need to aggregate from children
+  public List<String> genColLists(HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) {
+    List<String> colLists = new ArrayList<String>();
+    ArrayList<exprNodeDesc> keys = conf.getKeys();
+    for (exprNodeDesc key : keys)
+      colLists = Utilities.mergeUniqElems(colLists, key.getCols());
+    
+    ArrayList<aggregationDesc> aggrs = conf.getAggregators();
+    for (aggregationDesc aggr : aggrs) { 
+      ArrayList<exprNodeDesc> params = aggr.getParameters();
+      for (exprNodeDesc param : params) 
+        colLists = Utilities.mergeUniqElems(colLists, param.getCols());
+    }
+
+    return colLists;
+  }
 }

+ 179 - 126
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java

@@ -26,6 +26,9 @@ import java.util.Map;
 import java.util.Stack;
 import java.util.Vector;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
@@ -42,7 +45,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
  */
 public class JoinOperator extends Operator<joinDesc> implements Serializable {
 
-  // a list of value expressions for each alias are maintained 
+  static final private Log LOG = LogFactory.getLog(JoinOperator.class.getName());
+  
+  // a list of value expressions for each alias are maintained
   public static class JoinExprMap {
     ExprNodeEvaluator[] valueFields;
 
@@ -56,62 +61,79 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
 
   }
 
-  public static class IntermediateObject{
+  public static class IntermediateObject {
     ArrayList<Object>[] objs;
     int curSize;
 
     public IntermediateObject(ArrayList<Object>[] objs, int curSize) {
-      this.objs  = objs;
+      this.objs = objs;
       this.curSize = curSize;
     }
 
-    public ArrayList<Object>[] getObjs() { return objs; }
-    public int getCurSize() { return curSize; }
-    public void pushObj(ArrayList<Object> obj) { objs[curSize++] = obj; }
-    public void popObj() { curSize--; }
+    public ArrayList<Object>[] getObjs() {
+      return objs;
+    }
+
+    public int getCurSize() {
+      return curSize;
+    }
+
+    public void pushObj(ArrayList<Object> obj) {
+      objs[curSize++] = obj;
+    }
+
+    public void popObj() {
+      curSize--;
+    }
   }
 
   transient protected int numValues; // number of aliases
   transient static protected ExprNodeEvaluator aliasField;
+  transient static protected ExprNodeEvaluator keyField;
   transient protected HashMap<Byte, JoinExprMap> joinExprs;
-  transient static protected Byte[] order; // order in which the results should be outputted
+  transient static protected Byte[] order; // order in which the results should
+                                           // be outputted
   transient protected joinCond[] condn;
   transient protected boolean noOuterJoin;
-  transient private Object[] dummyObj; // for outer joins, contains the potential nulls for the concerned aliases
+  transient private Object[] dummyObj; // for outer joins, contains the
+                                       // potential nulls for the concerned
+                                       // aliases
   transient private Vector<ArrayList<Object>>[] dummyObjVectors;
   transient private Stack<Iterator<ArrayList<Object>>> iterators;
   transient private int totalSz; // total size of the composite object
   transient ObjectInspector joinOutputObjectInspector;
-  
-  static
-  {
-    aliasField = ExprNodeEvaluatorFactory.get(new exprNodeColumnDesc(String.class, Utilities.ReduceField.ALIAS.toString()));
+
+  static {
+    aliasField = ExprNodeEvaluatorFactory.get(new exprNodeColumnDesc(
+        String.class, Utilities.ReduceField.ALIAS.toString()));
+    keyField = ExprNodeEvaluatorFactory.get(new exprNodeColumnDesc(
+        String.class, Utilities.ReduceField.KEY.toString()));
   }
-  
-  HashMap<Byte, Vector<ArrayList<Object>>> storage;
 
+  HashMap<Byte, Vector<ArrayList<Object>>> storage;
+  int joinEmitInterval = -1;
+  
   public void initialize(Configuration hconf) throws HiveException {
     super.initialize(hconf);
     totalSz = 0;
     // Map that contains the rows for each alias
     storage = new HashMap<Byte, Vector<ArrayList<Object>>>();
-    
+
     numValues = conf.getExprs().size();
     joinExprs = new HashMap<Byte, JoinExprMap>();
-    if (order == null)
-    {
+    if (order == null) {
       order = new Byte[numValues];
       for (int i = 0; i < numValues; i++)
-        order[i] = (byte)i;
+        order[i] = (byte) i;
     }
     condn = conf.getConds();
     noOuterJoin = conf.getNoOuterJoin();
     Map<Byte, ArrayList<exprNodeDesc>> map = conf.getExprs();
     Iterator entryIter = map.entrySet().iterator();
     while (entryIter.hasNext()) {
-      Map.Entry e = (Map.Entry)entryIter.next();
-      Byte key = (Byte)e.getKey();
-      ArrayList<exprNodeDesc> expr = (ArrayList<exprNodeDesc>)e.getValue();
+      Map.Entry e = (Map.Entry) entryIter.next();
+      Byte key = (Byte) e.getKey();
+      ArrayList<exprNodeDesc> expr = (ArrayList<exprNodeDesc>) e.getValue();
       int sz = expr.size();
       totalSz += sz;
 
@@ -123,12 +145,15 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
       joinExprs.put(key, new JoinExprMap(valueFields));
     }
 
-    ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(totalSz);
-    for(int i=0; i<totalSz; i++) {
-      structFieldObjectInspectors.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
+    ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(
+        totalSz);
+    for (int i = 0; i < totalSz; i++) {
+      structFieldObjectInspectors.add(ObjectInspectorFactory
+          .getStandardPrimitiveObjectInspector(String.class));
     }
-    joinOutputObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
-        ObjectInspectorUtils.getIntegerArray(totalSz), structFieldObjectInspectors);
+    joinOutputObjectInspector = ObjectInspectorFactory
+        .getStandardStructObjectInspector(ObjectInspectorUtils
+            .getIntegerArray(totalSz), structFieldObjectInspectors);
 
     dummyObj = new Object[numValues];
     dummyObjVectors = new Vector[numValues];
@@ -149,6 +174,8 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     }
 
     iterators = new Stack<Iterator<ArrayList<Object>>>();
+    
+    joinEmitInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME);
   }
 
   public void startGroup() throws HiveException {
@@ -159,7 +186,9 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
   }
 
   InspectableObject tempAliasInspectableObject = new InspectableObject();
-  public void process(Object row, ObjectInspector rowInspector) throws HiveException {
+
+  public void process(Object row, ObjectInspector rowInspector)
+      throws HiveException {
     try {
       // get alias
       aliasField.evaluate(row, rowInspector, tempAliasInspectableObject);
@@ -176,15 +205,40 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
         nr.add(tempAliasInspectableObject.o);
       }
 
+      // Are we consuming too much memory
+      if (storage.get(alias).size() == joinEmitInterval) {
+        if (alias == numValues - 1) {
+          // The input is sorted by alias, so if we are already in the last join
+          // operand,
+          // we can emit some results now.
+          // Note this has to be done before adding the current row to the
+          // storage,
+          // to preserve the correctness for outer joins.
+          checkAndGenObject();
+          storage.get(alias).clear();
+        } else {
+          // Output a warning if we reached at least 1000 rows for a join
+          // operand
+          // We won't output a warning for the last join operand since the size
+          // will never goes to joinEmitInterval.
+          InspectableObject io = new InspectableObject();
+          keyField.evaluate(row, rowInspector, io);
+          LOG.warn("table " + alias
+              + " has more than joinEmitInterval rows for join key " + io.o);
+        }
+      }
+
       // Add the value to the vector
       storage.get(alias).add(nr);
+
     } catch (Exception e) {
       e.printStackTrace();
       throw new HiveException(e);
     }
   }
 
-  private void createForwardJoinObject(IntermediateObject intObj, boolean[] nullsArr) throws HiveException {
+  private void createForwardJoinObject(IntermediateObject intObj,
+      boolean[] nullsArr) throws HiveException {
     ArrayList<Object> nr = new ArrayList<Object>(totalSz);
     for (int i = 0; i < numValues; i++) {
       Byte alias = order[i];
@@ -204,15 +258,17 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
   }
 
   private void copyOldArray(boolean[] src, boolean[] dest) {
-    for (int i = 0; i < src.length; i++) dest[i] = src[i];
+    for (int i = 0; i < src.length; i++)
+      dest[i] = src[i];
   }
 
-  private Vector<boolean[]> joinObjectsInnerJoin(Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull)
-  {
-    if (newObjNull) return resNulls;
+  private Vector<boolean[]> joinObjectsInnerJoin(Vector<boolean[]> resNulls,
+      Vector<boolean[]> inputNulls, ArrayList<Object> newObj,
+      IntermediateObject intObj, int left, boolean newObjNull) {
+    if (newObjNull)
+      return resNulls;
     Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext())
-    {
+    while (nullsIter.hasNext()) {
       boolean[] oldNulls = nullsIter.next();
       boolean oldObjNull = oldNulls[left];
       if (!oldObjNull) {
@@ -224,12 +280,13 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     }
     return resNulls;
   }
-  
-  private Vector<boolean[]> joinObjectsLeftOuterJoin(Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull)
-  {
+
+  private Vector<boolean[]> joinObjectsLeftOuterJoin(
+      Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls,
+      ArrayList<Object> newObj, IntermediateObject intObj, int left,
+      boolean newObjNull) {
     Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext())
-    {
+    while (nullsIter.hasNext()) {
       boolean[] oldNulls = nullsIter.next();
       boolean oldObjNull = oldNulls[left];
       boolean[] newNulls = new boolean[intObj.getCurSize()];
@@ -243,25 +300,25 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     return resNulls;
   }
 
-  private Vector<boolean[]> joinObjectsRightOuterJoin(Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull)
-  {
-    if (newObjNull) return resNulls;
+  private Vector<boolean[]> joinObjectsRightOuterJoin(
+      Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls,
+      ArrayList<Object> newObj, IntermediateObject intObj, int left,
+      boolean newObjNull) {
+    if (newObjNull)
+      return resNulls;
     boolean allOldObjsNull = true;
 
     Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext())
-    {
+    while (nullsIter.hasNext()) {
       boolean[] oldNulls = nullsIter.next();
-      if (!oldNulls[left])
-      {
+      if (!oldNulls[left]) {
         allOldObjsNull = false;
         break;
       }
     }
 
     nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext())
-    {
+    while (nullsIter.hasNext()) {
       boolean[] oldNulls = nullsIter.next();
       boolean oldObjNull = oldNulls[left];
 
@@ -270,8 +327,7 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
         copyOldArray(oldNulls, newNulls);
         newNulls[oldNulls.length] = newObjNull;
         resNulls.add(newNulls);
-      }
-      else if (allOldObjsNull) {
+      } else if (allOldObjsNull) {
         boolean[] newNulls = new boolean[intObj.getCurSize()];
         for (int i = 0; i < intObj.getCurSize() - 1; i++)
           newNulls[i] = true;
@@ -282,12 +338,13 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     return resNulls;
   }
 
-  private Vector<boolean[]> joinObjectsFullOuterJoin(Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int left, boolean newObjNull)
-  {
+  private Vector<boolean[]> joinObjectsFullOuterJoin(
+      Vector<boolean[]> resNulls, Vector<boolean[]> inputNulls,
+      ArrayList<Object> newObj, IntermediateObject intObj, int left,
+      boolean newObjNull) {
     if (newObjNull) {
       Iterator<boolean[]> nullsIter = inputNulls.iterator();
-      while (nullsIter.hasNext())
-      {
+      while (nullsIter.hasNext()) {
         boolean[] oldNulls = nullsIter.next();
         boolean[] newNulls = new boolean[intObj.getCurSize()];
         copyOldArray(oldNulls, newNulls);
@@ -296,15 +353,13 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
       }
       return resNulls;
     }
-    
+
     boolean allOldObjsNull = true;
 
     Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext())
-    {
+    while (nullsIter.hasNext()) {
       boolean[] oldNulls = nullsIter.next();
-      if (!oldNulls[left])
-      {
+      if (!oldNulls[left]) {
         allOldObjsNull = false;
         break;
       }
@@ -312,24 +367,21 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     boolean rhsPreserved = false;
 
     nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext())
-    {
+    while (nullsIter.hasNext()) {
       boolean[] oldNulls = nullsIter.next();
       boolean oldObjNull = oldNulls[left];
 
-      if (!oldObjNull)   
-      {
+      if (!oldObjNull) {
         boolean[] newNulls = new boolean[intObj.getCurSize()];
         copyOldArray(oldNulls, newNulls);
         newNulls[oldNulls.length] = newObjNull;
         resNulls.add(newNulls);
-      }
-      else if (oldObjNull) {
+      } else if (oldObjNull) {
         boolean[] newNulls = new boolean[intObj.getCurSize()];
         copyOldArray(oldNulls, newNulls);
         newNulls[oldNulls.length] = true;
         resNulls.add(newNulls);
-         
+
         if (allOldObjsNull && !rhsPreserved) {
           newNulls = new boolean[intObj.getCurSize()];
           for (int i = 0; i < oldNulls.length; i++)
@@ -344,35 +396,35 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
   }
 
   /*
-   * The new input is added to the list of existing inputs. Each entry in the 
-   * array of inputNulls denotes the entries in the intermediate object to
-   * be used. The intermediate object is augmented with the new object, and 
-   * list of nulls is changed appropriately. The list will contain all non-nulls
-   * for a inner join. The outer joins are processed appropriately.
+   * The new input is added to the list of existing inputs. Each entry in the
+   * array of inputNulls denotes the entries in the intermediate object to be
+   * used. The intermediate object is augmented with the new object, and list of
+   * nulls is changed appropriately. The list will contain all non-nulls for a
+   * inner join. The outer joins are processed appropriately.
    */
-  private Vector<boolean[]> joinObjects(Vector<boolean[]> inputNulls, ArrayList<Object> newObj, IntermediateObject intObj, int joinPos)
-  {
+  private Vector<boolean[]> joinObjects(Vector<boolean[]> inputNulls,
+      ArrayList<Object> newObj, IntermediateObject intObj, int joinPos) {
     Vector<boolean[]> resNulls = new Vector<boolean[]>();
     boolean newObjNull = newObj == dummyObj[joinPos] ? true : false;
-    if (joinPos == 0)
-    {
-      if (newObjNull) return null;
+    if (joinPos == 0) {
+      if (newObjNull)
+        return null;
       boolean[] nulls = new boolean[1];
       nulls[0] = newObjNull;
       resNulls.add(nulls);
       return resNulls;
     }
-    
+
     int left = condn[joinPos - 1].getLeft();
     int type = condn[joinPos - 1].getType();
-    
+
     // process all nulls for RIGHT and FULL OUTER JOINS
-    if (((type == joinDesc.RIGHT_OUTER_JOIN) || (type == joinDesc.FULL_OUTER_JOIN)) 
-        && !newObjNull && (inputNulls == null)) { 
+    if (((type == joinDesc.RIGHT_OUTER_JOIN) || (type == joinDesc.FULL_OUTER_JOIN))
+        && !newObjNull && (inputNulls == null)) {
       boolean[] newNulls = new boolean[intObj.getCurSize()];
       for (int i = 0; i < newNulls.length - 1; i++)
         newNulls[i] = true;
-      newNulls[newNulls.length-1] = false;
+      newNulls[newNulls.length - 1] = false;
       resNulls.add(newNulls);
       return resNulls;
     }
@@ -380,41 +432,45 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     if (inputNulls == null)
       return null;
 
-    if (type == joinDesc.INNER_JOIN) 
-      return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull);
-    else if (type == joinDesc.LEFT_OUTER_JOIN) 
-      return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull);
-    else if (type == joinDesc.RIGHT_OUTER_JOIN) 
-      return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull);
+    if (type == joinDesc.INNER_JOIN)
+      return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left,
+          newObjNull);
+    else if (type == joinDesc.LEFT_OUTER_JOIN)
+      return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj,
+          left, newObjNull);
+    else if (type == joinDesc.RIGHT_OUTER_JOIN)
+      return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj,
+          left, newObjNull);
     assert (type == joinDesc.FULL_OUTER_JOIN);
-    return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull);
+    return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left,
+        newObjNull);
   }
-  
-  /* 
-   * genObject is a recursive function. For the inputs, a array of
-   * bitvectors is maintained (inputNulls) where each entry denotes whether
-   * the element is to be used or not (whether it is null or not). The size of
-   * the bitvector is same as the number of inputs under consideration 
-   * currently. When all inputs are accounted for, the output is forwared
-   * appropriately.
+
+  /*
+   * genObject is a recursive function. For the inputs, a array of bitvectors is
+   * maintained (inputNulls) where each entry denotes whether the element is to
+   * be used or not (whether it is null or not). The size of the bitvector is
+   * same as the number of inputs under consideration currently. When all inputs
+   * are accounted for, the output is forwared appropriately.
    */
-  private void genObject(Vector<boolean[]> inputNulls, int aliasNum, IntermediateObject intObj) 
-    throws HiveException {
+  private void genObject(Vector<boolean[]> inputNulls, int aliasNum,
+      IntermediateObject intObj) throws HiveException {
     if (aliasNum < numValues) {
       Iterator<ArrayList<Object>> aliasRes = storage.get(order[aliasNum])
-        .iterator();
+          .iterator();
       iterators.push(aliasRes);
       while (aliasRes.hasNext()) {
         ArrayList<Object> newObj = aliasRes.next();
         intObj.pushObj(newObj);
-        Vector<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj, aliasNum);
+        Vector<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
+            aliasNum);
         genObject(newNulls, aliasNum + 1, intObj);
         intObj.popObj();
       }
       iterators.pop();
-    }
-    else {
-      if (inputNulls == null) return;
+    } else {
+      if (inputNulls == null)
+        return;
       Iterator<boolean[]> nullsIter = inputNulls.iterator();
       while (nullsIter.hasNext()) {
         boolean[] nullsVec = nullsIter.next();
@@ -429,29 +485,27 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
    * @throws HiveException
    */
   public void endGroup() throws HiveException {
-    try {
-      LOG.trace("Join Op: endGroup called: numValues=" + numValues);
-
-      // does any result need to be emitted
-      for (int i = 0; i < numValues; i++) {
-        Byte alias = order[i];
-        if (storage.get(alias).iterator().hasNext() == false) {
-          if (noOuterJoin) {
-            LOG.trace("No data for alias=" + i);
-            return;
-          } else {
-            storage.put(alias, dummyObjVectors[i]);
-          }
+    LOG.trace("Join Op: endGroup called: numValues=" + numValues);
+    checkAndGenObject();
+  }
+
+  private void checkAndGenObject() throws HiveException {
+    // does any result need to be emitted
+    for (int i = 0; i < numValues; i++) {
+      Byte alias = order[i];
+      if (storage.get(alias).iterator().hasNext() == false) {
+        if (noOuterJoin) {
+          LOG.trace("No data for alias=" + i);
+          return;
+        } else {
+          storage.put(alias, dummyObjVectors[i]);
         }
       }
-
-      LOG.trace("calling genObject");
-      genObject(null, 0, new IntermediateObject(new ArrayList[numValues], 0));
-      LOG.trace("called genObject");
-    } catch (Exception e) {
-      e.printStackTrace();
-      throw new HiveException(e);
     }
+
+    LOG.trace("calling genObject");
+    genObject(null, 0, new IntermediateObject(new ArrayList[numValues], 0));
+    LOG.trace("called genObject");
   }
 
   /**
@@ -462,6 +516,5 @@ public class JoinOperator extends Operator<joinDesc> implements Serializable {
     LOG.trace("Join Op close");
     super.close(abort);
   }
-}
-
 
+}

+ 5 - 2
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java

@@ -19,16 +19,18 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.*;
+import java.util.HashMap;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.plan.limitDesc;
-import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.conf.Configuration;
 
 /**
  * Limit operator implementation
- * Limits a subobject and passes that on.
+ * Limits the number of rows to be passed on.
  **/
 public class LimitOperator extends Operator<limitDesc> implements Serializable {
   private static final long serialVersionUID = 1L;
@@ -50,4 +52,5 @@ public class LimitOperator extends Operator<limitDesc> implements Serializable {
     else
       setDone(true);
   }
+
 }

+ 5 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java

@@ -62,6 +62,11 @@ public class MapRedTask extends Task<mapredWork> implements Serializable {
     
       String cmdLine = hadoopExec + " jar " + auxJars + " " + hiveJar + " org.apache.hadoop.hive.ql.exec.ExecDriver -plan " + planFile.toString() + " " + hiveConfArgs;
       
+      String files = ExecDriver.getRealFiles(conf);
+      if(!files.isEmpty()) {
+        cmdLine = cmdLine + " -files " + files;
+      }
+
       LOG.info("Executing: " + cmdLine);
       Process executor = Runtime.getRuntime().exec(cmdLine);
 

+ 44 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java

@@ -20,8 +20,11 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.io.Serializable;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
 
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.LocalFileSystem;
@@ -30,6 +33,9 @@ import org.apache.hadoop.hive.ql.plan.loadFileDesc;
 import org.apache.hadoop.hive.ql.plan.loadTableDesc;
 import org.apache.hadoop.hive.ql.plan.moveWork;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -108,6 +114,44 @@ public class MoveTask extends Task<moveWork> implements Serializable {
         String mesg_detail = " from " + tbd.getSourceDir();
         console.printInfo(mesg, mesg_detail);
 
+        // Get the file format of the table
+        boolean tableIsSequenceFile = tbd.getTable().getInputFileFormatClass().equals(SequenceFileInputFormat.class);
+        // Get all files from the src directory
+        FileStatus [] dirs;
+        ArrayList<FileStatus> files;
+        try {
+          fs = FileSystem.get(db.getTable(tbd.getTable().getTableName()).getDataLocation(),
+              Hive.get().getConf());
+          dirs = fs.globStatus(new Path(tbd.getSourceDir()));
+          files = new ArrayList<FileStatus>();
+          for (int i=0; i<dirs.length; i++) {
+            files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath())));
+            // We only check one file, so exit the loop when we have at least one.
+            if (files.size()>0) break;
+          }
+        } catch (IOException e) {
+          throw new HiveException("addFiles: filesystem error in check phase", e);
+        }
+        // Check if the file format of the file matches that of the table.
+        if (files.size() > 0) {
+          int fileId = 0;
+          boolean fileIsSequenceFile = true;   
+          try {
+            SequenceFile.Reader reader = new SequenceFile.Reader(
+              fs, files.get(fileId).getPath(), conf);
+            reader.close();
+          } catch (IOException e) {
+            fileIsSequenceFile = false;
+          }
+          if (!fileIsSequenceFile && tableIsSequenceFile) {
+            throw new HiveException("Cannot load text files into a table stored as SequenceFile.");
+          }
+          if (fileIsSequenceFile && !tableIsSequenceFile) {
+            throw new HiveException("Cannot load SequenceFiles into a table stored as TextFile.");
+          }
+        }
+         
+
         if(tbd.getPartitionSpec().size() == 0) {
           db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), tbd.getReplace());
         } else {

+ 30 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java

@@ -22,6 +22,7 @@ import java.util.*;
 import java.io.*;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.plan.mapredWork;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -31,6 +32,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.plan.explain;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 
 /**
  * Base operator implementation
@@ -42,6 +45,7 @@ public abstract class Operator <T extends Serializable> implements Serializable
   private static final long serialVersionUID = 1L;
   
   protected List<Operator<? extends Serializable>> childOperators;
+  protected List<Operator<? extends Serializable>> parentOperators;
 
   public Operator() {}
 
@@ -53,6 +57,14 @@ public abstract class Operator <T extends Serializable> implements Serializable
     return childOperators;
   }
 
+  public void setParentOperators(List<Operator<? extends Serializable>> parentOperators) {
+    this.parentOperators = parentOperators;
+  }
+
+  public List<Operator<? extends Serializable>> getParentOperators() {
+    return parentOperators;
+  }
+
   protected String id;
   protected T conf;
   protected boolean done;
@@ -277,4 +289,22 @@ public abstract class Operator <T extends Serializable> implements Serializable
     }    
   }
 
+  public List<String> mergeColListsFromChildren(List<String> colList, 
+                                        HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) {
+    return colList;
+  }
+
+  public List<String> genColLists(HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) 
+    throws SemanticException {
+    List<String> colList = new ArrayList<String>();
+    if (childOperators != null)
+      for(Operator<? extends Serializable> o: childOperators)
+        colList = Utilities.mergeUniqElems(colList, o.genColLists(opParseCtx));
+
+    List<String> cols = mergeColListsFromChildren(colList, opParseCtx);
+    OpParseContext ctx = opParseCtx.get(this);
+    ctx.setColNames(cols);
+    return cols;
+  }
+
 }

+ 7 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java

@@ -109,7 +109,14 @@ public class OperatorFactory {
       children.add(ret);
       op.setChildOperators(children);
     }
+
+    // add parents for the newly created operator
+    List<Operator<? extends Serializable>> parent = new ArrayList<Operator<? extends Serializable>>();
+    for(Operator op: oplist)
+      parent.add(op);
     
+    ret.setParentOperators(parent);
+
     return (ret);
   }
 

+ 127 - 27
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java

@@ -20,10 +20,11 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.io.*;
 import java.util.ArrayList;
+import java.util.List;
+import java.util.HashMap;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.reduceSinkDesc;
 import org.apache.hadoop.hive.ql.plan.tableDesc;
@@ -34,7 +35,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 
 /**
  * Reduce Sink Operator sends output to the reduce stage
@@ -42,15 +48,31 @@ import org.apache.hadoop.io.Text;
 public class ReduceSinkOperator extends TerminalOperator <reduceSinkDesc> implements Serializable {
 
   private static final long serialVersionUID = 1L;
+
+  /**
+   * The evaluators for the key columns.
+   * Key columns decide the sort order on the reducer side.
+   * Key columns are passed to the reducer in the "key".
+   */
   transient protected ExprNodeEvaluator[] keyEval;
+  /**
+   * The evaluators for the value columns.
+   * Value columns are passed to reducer in the "value". 
+   */
   transient protected ExprNodeEvaluator[] valueEval;
+  /**
+   * The evaluators for the partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language).
+   * Partition columns decide the reducer that the current row goes to.
+   * Partition columns are not passed to reducer.
+   */
+  transient protected ExprNodeEvaluator[] partitionEval;
   
   // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready
   transient Serializer keySerializer;
+  transient boolean keyIsText;
   transient Serializer valueSerializer;
   transient int tag;
   transient byte[] tagByte = new byte[1];
-  transient int numPartitionFields; 
   
   public void initialize(Configuration hconf) throws HiveException {
     super.initialize(hconf);
@@ -67,6 +89,12 @@ public class ReduceSinkOperator extends TerminalOperator <reduceSinkDesc> implem
         valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
       }
 
+      partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
+      i=0;
+      for(exprNodeDesc e: conf.getPartitionCols()) {
+        partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
+      }
+
       tag = conf.getTag();
       tagByte[0] = (byte)tag;
       LOG.info("Using tag = " + tag);
@@ -74,13 +102,11 @@ public class ReduceSinkOperator extends TerminalOperator <reduceSinkDesc> implem
       tableDesc keyTableDesc = conf.getKeySerializeInfo();
       keySerializer = (Serializer)keyTableDesc.getDeserializerClass().newInstance();
       keySerializer.initialize(null, keyTableDesc.getProperties());
+      keyIsText = keySerializer.getSerializedClass().equals(Text.class);
       
       tableDesc valueTableDesc = conf.getValueSerializeInfo();
       valueSerializer = (Serializer)valueTableDesc.getDeserializerClass().newInstance();
       valueSerializer.initialize(null, valueTableDesc.getProperties());
-      
-      // Set the number of key fields to be used in the partitioner.
-      numPartitionFields = conf.getNumPartitionFields();
     } catch (Exception e) {
       e.printStackTrace();
       throw new RuntimeException(e);
@@ -89,7 +115,7 @@ public class ReduceSinkOperator extends TerminalOperator <reduceSinkDesc> implem
 
   transient InspectableObject tempInspectableObject = new InspectableObject();
   transient HiveKey keyWritable = new HiveKey();
-  transient Text valueText;
+  transient Writable value;
   
   transient ObjectInspector keyObjectInspector;
   transient ObjectInspector valueObjectInspector;
@@ -97,64 +123,138 @@ public class ReduceSinkOperator extends TerminalOperator <reduceSinkDesc> implem
   transient ArrayList<ObjectInspector> valueFieldsObjectInspectors = new ArrayList<ObjectInspector>();
   
   public void process(Object row, ObjectInspector rowInspector) throws HiveException {
-    // TODO: use DynamicSerDe when that is ready
     try {
-      // Generate hashCode for the tuple
-      int keyHashCode = 0;
-      if (numPartitionFields == -1) {
-        keyHashCode = (int)(Math.random() * Integer.MAX_VALUE);
-      }
+      // Evaluate the keys
       ArrayList<Object> keys = new ArrayList<Object>(keyEval.length);
       for(ExprNodeEvaluator e: keyEval) {
         e.evaluate(row, rowInspector, tempInspectableObject);
         keys.add(tempInspectableObject.o);
-        if (numPartitionFields == keys.size()) {
-          keyHashCode = keys.hashCode();
-        }
+        // Construct the keyObjectInspector from the first row
         if (keyObjectInspector == null) {
           keyFieldsObjectInspectors.add(tempInspectableObject.oi);
         }
       }
-      if (numPartitionFields > keys.size()) {
-        keyHashCode = keys.hashCode();
-      }
+      // Construct the keyObjectInspector from the first row
       if (keyObjectInspector == null) {
         keyObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
             ObjectInspectorUtils.getIntegerArray(keyFieldsObjectInspectors.size()),
             keyFieldsObjectInspectors);
       }
-      Text key = (Text)keySerializer.serialize(keys, keyObjectInspector);
-      if (tag == -1) {
-        keyWritable.set(key.getBytes(), 0, key.getLength());
+      // Serialize the keys and append the tag
+      if (keyIsText) {
+        Text key = (Text)keySerializer.serialize(keys, keyObjectInspector);
+        if (tag == -1) {
+          keyWritable.set(key.getBytes(), 0, key.getLength());
+        } else {
+          int keyLength = key.getLength();
+          keyWritable.setSize(keyLength+1);
+          System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
+          keyWritable.get()[keyLength] = tagByte[0];
+        }
       } else {
-        int keyLength = key.getLength();
-        keyWritable.setSize(keyLength+1);
-        System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
-        keyWritable.get()[keyLength] = tagByte[0];
+        // Must be BytesWritable
+        BytesWritable key = (BytesWritable)keySerializer.serialize(keys, keyObjectInspector);
+        if (tag == -1) {
+          keyWritable.set(key.get(), 0, key.getSize());
+        } else {
+          int keyLength = key.getSize();
+          keyWritable.setSize(keyLength+1);
+          System.arraycopy(key.get(), 0, keyWritable.get(), 0, keyLength);
+          keyWritable.get()[keyLength] = tagByte[0];
+        }
+      }
+      // Set the HashCode
+      int keyHashCode = 0;
+      for(ExprNodeEvaluator e: partitionEval) {
+        e.evaluate(row, rowInspector, tempInspectableObject);
+        keyHashCode = keyHashCode * 31 
+          + (tempInspectableObject.o == null ? 0 : tempInspectableObject.o.hashCode());
       }
       keyWritable.setHashCode(keyHashCode);
       
+      // Evaluate the value
       ArrayList<Object> values = new ArrayList<Object>(valueEval.length);
       for(ExprNodeEvaluator e: valueEval) {
         e.evaluate(row, rowInspector, tempInspectableObject);
         values.add(tempInspectableObject.o);
+        // Construct the valueObjectInspector from the first row
         if (valueObjectInspector == null) {
           valueFieldsObjectInspectors.add(tempInspectableObject.oi);
         }
       }
+      // Construct the valueObjectInspector from the first row
       if (valueObjectInspector == null) {
         valueObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
             ObjectInspectorUtils.getIntegerArray(valueFieldsObjectInspectors.size()),
             valueFieldsObjectInspectors);
       }
-      valueText = (Text)valueSerializer.serialize(values, valueObjectInspector);
+      // Serialize the value
+      value = valueSerializer.serialize(values, valueObjectInspector);
     } catch (SerDeException e) {
       throw new HiveException(e);
     }
+    
     try {
-      out.collect(keyWritable, valueText);
+      out.collect(keyWritable, value);
     } catch (IOException e) {
       throw new HiveException (e);
     }
   }
+  
+  public List<String> genColLists(HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) 
+    throws SemanticException {
+    RowResolver redSinkRR = opParseCtx.get(this).getRR();
+    List<String> childColLists = new ArrayList<String>();
+
+    for(Operator<? extends Serializable> o: childOperators)
+      childColLists = Utilities.mergeUniqElems(childColLists, o.genColLists(opParseCtx));
+
+    List<String> colLists = new ArrayList<String>();
+    ArrayList<exprNodeDesc> keys = conf.getKeyCols();
+    for (exprNodeDesc key : keys)
+      colLists = Utilities.mergeUniqElems(colLists, key.getCols());
+
+    // In case of extract child, see the columns used and propagate them
+    if ((childOperators.size() == 1) && (childOperators.get(0) instanceof ExtractOperator)) {
+      assert parentOperators.size() == 1;
+      Operator<? extends Serializable> par = parentOperators.get(0);
+      RowResolver parRR = opParseCtx.get(par).getRR();
+
+      for (String childCol : childColLists) {
+        String [] nm = redSinkRR.reverseLookup(childCol);
+        ColumnInfo cInfo = parRR.get(nm[0],nm[1]);
+        if (!colLists.contains(cInfo.getInternalName()))
+          colLists.add(cInfo.getInternalName());
+      }
+    }
+    else if ((childOperators.size() == 1) && (childOperators.get(0) instanceof JoinOperator)) {
+      assert parentOperators.size() == 1;
+      Operator<? extends Serializable> par = parentOperators.get(0);
+      RowResolver parRR = opParseCtx.get(par).getRR();
+      RowResolver childRR = opParseCtx.get(childOperators.get(0)).getRR();
+
+      for (String childCol : childColLists) {
+        String [] nm = childRR.reverseLookup(childCol);
+        ColumnInfo cInfo = redSinkRR.get(nm[0],nm[1]);
+        if (cInfo != null) {
+          cInfo = parRR.get(nm[0], nm[1]);
+          if (!colLists.contains(cInfo.getInternalName()))
+            colLists.add(cInfo.getInternalName());
+        }
+      }
+    }
+    else {
+      
+      // Reduce Sink contains the columns needed - no need to aggregate from children
+      ArrayList<exprNodeDesc> vals = conf.getValueCols();
+      for (exprNodeDesc val : vals)
+        colLists = Utilities.mergeUniqElems(colLists, val.getCols());
+    }
+
+    OpParseContext ctx = opParseCtx.get(this);
+    ctx.setColNames(colLists);
+    opParseCtx.put(this, ctx);
+    return colLists;
+  }
+
 }

+ 86 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.mapred.LineRecordReader.LineReader;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.fs.FileUtil;
 
 
 public class ScriptOperator extends Operator<scriptDesc> implements Serializable {
@@ -89,6 +90,77 @@ public class ScriptOperator extends Operator<scriptDesc> implements Serializable
     }
   }
 
+
+  /**
+   * Maps a relative pathname to an absolute pathname using the
+   * PATH enviroment.
+   */
+  public class PathFinder
+  {
+    String pathenv;        // a string of pathnames
+    String pathSep;        // the path seperator
+    String fileSep;        // the file seperator in a directory
+
+    /**
+     * Construct a PathFinder object using the path from
+     * the specified system environment variable.
+     */
+    public PathFinder(String envpath)
+    {
+      pathenv = System.getenv(envpath);
+      pathSep = System.getProperty("path.separator");
+      fileSep = System.getProperty("file.separator");
+    }
+
+    /**
+     * Appends the specified component to the path list
+     */
+    public void prependPathComponent(String str)
+    {
+      pathenv = str + pathSep + pathenv;
+    }
+
+    /**
+     * Returns the full path name of this file if it is listed in the
+     * path
+     */
+    public File getAbsolutePath(String filename)
+    {
+      if (pathenv == null || pathSep == null  || fileSep == null) {
+        return null;
+      }
+      int     val = -1;
+      String    classvalue = pathenv + pathSep;
+
+      while (((val = classvalue.indexOf(pathSep)) >= 0) &&
+             classvalue.length() > 0) {
+        //
+        // Extract each entry from the pathenv
+        //
+        String entry = classvalue.substring(0, val).trim();
+        File f = new File(entry);
+
+        try {
+          if (f.isDirectory()) {
+            //
+            // this entry in the pathenv is a directory.
+            // see if the required file is in this directory
+            //
+            f = new File(entry + fileSep + filename);
+          }
+          //
+          // see if the filename matches and  we can read it
+          //
+          if (f.isFile() && f.canRead()) {
+            return f;
+          }
+        } catch (Exception exp){ }
+        classvalue = classvalue.substring(val+1).trim();
+      }
+      return null;
+    }
+  }
+
   public void initialize(Configuration hconf) throws HiveException {
     super.initialize(hconf);
     statsMap.put(Counter.DESERIALIZE_ERRORS, deserialize_error_count);
@@ -104,6 +176,20 @@ public class ScriptOperator extends Operator<scriptDesc> implements Serializable
       scriptInputSerializer.initialize(hconf, conf.getScriptInputInfo().getProperties());
 
       String [] cmdArgs = splitArgs(conf.getScriptCmd());
+
+      String prog = cmdArgs[0];
+      File currentDir = new File(".").getAbsoluteFile();
+
+      if (!new File(prog).isAbsolute()) {
+        PathFinder finder = new PathFinder("PATH");
+        finder.prependPathComponent(currentDir.toString());
+        File f = finder.getAbsolutePath(prog);
+        if (f != null) {
+          cmdArgs[0] = f.getAbsolutePath();
+        }
+        f = null;
+      }
+
       String [] wrappedCmdArgs = addWrapper(cmdArgs);
       LOG.info("Executing " + Arrays.asList(wrappedCmdArgs));
       LOG.info("tablename=" + hconf.get(HiveConf.ConfVars.HIVETABLENAME.varname));

+ 61 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java

@@ -20,14 +20,18 @@ package org.apache.hadoop.hive.ql.exec;
 
 import java.io.*;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.selectDesc;
 import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 
 /**
  * Select operator implementation
@@ -87,4 +91,61 @@ public class SelectOperator extends Operator <selectDesc> implements Serializabl
     }
     forward(output, outputObjectInspector);
   }
+
+  private List<String> getColsFromExpr(HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) {
+    List<String> cols = new ArrayList<String>();
+    ArrayList<exprNodeDesc> exprList = conf.getColList();
+    for (exprNodeDesc expr : exprList)
+      cols = Utilities.mergeUniqElems(cols, expr.getCols());
+    List<Integer> listExprs = new ArrayList<Integer>();
+    for (int pos = 0; pos < exprList.size(); pos++)
+      listExprs.add(new Integer(pos));
+    OpParseContext ctx = opParseCtx.get(this);
+    ctx.setColNames(cols);
+    opParseCtx.put(this, ctx);
+    return cols;
+  }
+
+  private List<String> getColsFromExpr(List<String> colList, 
+                                       HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) {
+  	if (colList.isEmpty())
+  		return getColsFromExpr(opParseCtx);
+  	
+    List<String> cols = new ArrayList<String>();
+    ArrayList<exprNodeDesc> selectExprs = conf.getColList();
+    List<Integer> listExprs = new ArrayList<Integer>();
+
+    for (String col : colList) {
+      // col is the internal name i.e. position within the expression list
+      Integer pos = new Integer(col);
+      exprNodeDesc expr = selectExprs.get(pos.intValue());
+      cols = Utilities.mergeUniqElems(cols, expr.getCols());
+      listExprs.add(pos);
+    }
+
+    OpParseContext ctx = opParseCtx.get(this);
+    ctx.setColNames(cols);
+    opParseCtx.put(this, ctx);
+    return cols;
+  }
+
+  public List<String> genColLists(HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) 
+    throws SemanticException {
+    List<String> cols = new ArrayList<String>();
+    
+    for(Operator<? extends Serializable> o: childOperators) {
+      // if one of my children is a fileSink, return everything
+      if ((o instanceof FileSinkOperator) || (o instanceof ScriptOperator))
+        return getColsFromExpr(opParseCtx);
+
+      cols = Utilities.mergeUniqElems(cols, o.genColLists(opParseCtx));
+    }
+
+    if (conf.isSelectStar())
+      // The input to the select does not matter. Go over the expressions and return the ones which have a marked column
+      return getColsFromExpr(cols, opParseCtx);
+    
+    return getColsFromExpr(opParseCtx);
+  }
+
 }

+ 123 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Throttle.java

@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.*;
+import java.util.*;
+import java.util.regex.Pattern;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.net.URLDecoder;
+import java.net.MalformedURLException;
+import java.net.InetSocketAddress;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobTracker;
+
+/*
+ * Intelligence to make clients wait if the cluster is in a bad state.
+ */
+public class Throttle {
+
+  // The percentage of maximum allocated memory that triggers GC
+  // on job tracker. This could be overridden thru the jobconf.
+  // The default is such that there is no throttling.
+  static private int DEFAULT_MEMORY_GC_PERCENT = 100;
+
+  // sleep this many seconds between each retry.
+  // This could be overridden thru the jobconf.
+  static private int DEFAULT_RETRY_PERIOD = 60;
+
+  /**
+   * fetch http://tracker.om:/gc.jsp?threshold=period
+   */
+  static void checkJobTracker(JobConf conf, Log LOG)  {
+
+    try {
+      byte buffer[] = new byte[1024]; 
+      int threshold = conf.getInt("mapred.throttle.threshold.percent",
+                                  DEFAULT_MEMORY_GC_PERCENT);
+      int retry = conf.getInt("mapred.throttle.retry.period",
+                              DEFAULT_RETRY_PERIOD);
+
+      // If the threshold is 100 percent, then there is no throttling
+      if (threshold == 100) {
+        return;
+      }
+
+      // find the http port for the jobtracker
+      String infoAddr = conf.get("mapred.job.tracker.http.address");
+      if (infoAddr == null) {
+        throw new IOException("Throttle: Unable to find job tracker info port.");
+      }
+      InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr);
+      int infoPort = infoSocAddr.getPort();
+
+      // This is the Job Tracker URL
+      String tracker = "http://" +
+                       JobTracker.getAddress(conf).getHostName() + ":" +
+                       infoPort +
+                       "/gc.jsp?threshold=" + threshold;
+
+      while (true) {
+        // read in the first 1K characters from the URL
+        URL url = new URL(tracker);
+        LOG.debug("Throttle: URL " + tracker);
+        InputStream in = url.openStream();
+        int numRead = in.read(buffer);
+        in.close();
+        String fetchString = new String(buffer);
+
+        // fetch the xml tag <dogc>xxx</dogc>
+        Pattern dowait = Pattern.compile("<dogc>",
+                         Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE);
+        String[] results = dowait.split(fetchString);
+        if (results.length != 2) {
+          throw new IOException("Throttle: Unable to parse response of URL " + url + 
+                                ". Get retuned " + fetchString);
+        }
+        dowait = Pattern.compile("</dogc>",
+                         Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE);
+        results = dowait.split(results[1]);
+        if (results.length < 1) {
+          throw new IOException("Throttle: Unable to parse response of URL " + url + 
+                                ". Get retuned " + fetchString);
+        }
+
+        // if the jobtracker signalled that the threshold is not exceeded, 
+        // then we return immediately.
+        if (results[0].trim().compareToIgnoreCase("false") == 0) {
+          return;
+        }
+
+        // The JobTracker has exceeded its threshold and is doing a GC.
+        // The client has to wait and retry.
+        LOG.warn("Job is being throttled because of resource crunch on the " +
+                 "JobTracker. Will retry in " + retry + " seconds..");
+        Thread.sleep(retry * 1000L);
+      }
+    } catch (Exception e) {
+      LOG.warn("Job is not being throttled. " + e);
+    }
+  }
+}

+ 44 - 8
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java

@@ -140,8 +140,7 @@ public class Utilities {
       // Set up distributed cache
       DistributedCache.createSymlink(job);
       String uriWithLink = planPath.toUri().toString() + "#HIVE_PLAN";
-      URI[] fileURIs = new URI[] {new URI(uriWithLink)};
-      DistributedCache.setCacheFiles(fileURIs, job);
+      DistributedCache.addCacheFile(new URI(uriWithLink), job);
       // Cache the object in this process too so lookups don't hit the file system
       synchronized (Utilities.class) {
         gWork = w;
@@ -198,15 +197,13 @@ public class Utilities {
   public static tableDesc defaultTd;
   static {
     // by default we expect ^A separated strings
+    // This tableDesc does not provide column names.  We should always use
+    // PlanUtils.getDefaultTableDesc(String separatorCode, String columns)
+    // or getBinarySortableTableDesc(List<FieldSchema> fieldSchemas) when 
+    // we know the column names.
     defaultTd = PlanUtils.getDefaultTableDesc("" + Utilities.ctrlaCode);
   }
 
-  public static tableDesc defaultTabTd;
-  static {
-    // Default tab-separated tableDesc
-    defaultTabTd = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode);
-  }
-  
   public final static int newLineCode = 10;
   public final static int tabCode = 9;
   public final static int ctrlaCode = 1;
@@ -431,4 +428,43 @@ public class Utilities {
                                       keyClass, valClass, compressionType, codec));
 
   }
+
+  /**
+   * Shamelessly cloned from GenericOptionsParser
+   */
+  public static String realFile(String newFile, Configuration conf) throws IOException {
+    Path path = new Path(newFile);
+    URI pathURI =  path.toUri();
+    FileSystem fs;
+
+    if (pathURI.getScheme() == null) {
+      fs = FileSystem.getLocal(conf);
+    } else {
+      fs = path.getFileSystem(conf);
+    }
+
+    if (!fs.exists(path)) {
+      return null;
+    }
+
+    try {
+      fs.close();
+    } catch(IOException e){};
+
+    return (path.makeQualified(fs).toString());
+  }
+
+  public static List<String> mergeUniqElems(List<String> src, List<String> dest) {
+    if (dest == null) return src;
+    if (src == null) return dest;
+    int pos = 0;
+
+    while (pos < dest.size()) {
+      if (!src.contains(dest.get(pos)))
+        src.add(dest.get(pos));
+      pos++;
+    }
+
+    return src;
+  }
 }

+ 330 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/FlatFileInputFormat.java

@@ -0,0 +1,330 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.IOException;
+import java.io.EOFException;
+import java.io.InputStream;
+import java.io.DataInputStream;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RecordReader;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configurable;
+
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.Serializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Deserializer;
+
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/** An {@link InputFormat} for Plain files with {@link Deserializer} records */
+public class FlatFileInputFormat<T> extends FileInputFormat<Void, FlatFileInputFormat.RowContainer<T>> {
+
+  /**
+   * A work-around until HADOOP-1230 is fixed. 
+   *
+   * Allows boolean next(k,v) to be called by reference but still allow the deserializer to create a new
+   * object (i.e., row) on every call to next.
+   */
+  static public class RowContainer<T> {
+    T row;
+  }
+
+  /**
+   * An implementation of SerializationContext is responsible for looking up the Serialization implementation
+   * for the given RecordReader. Potentially based on the Configuration or some other mechanism
+   *
+   * The SerializationFactory does not give this functionality since:
+   *  1. Requires Serialization implementations to be specified in the Configuration a-priori (although same as setting
+   *     a SerializationContext)
+   *  2. Does not lookup the actual subclass being deserialized. e.g., for Serializable does not have a way of  configuring
+   *      the actual Java class being serialized/deserialized.
+   */
+  static public interface SerializationContext<S> extends Configurable {
+
+    /**
+     *  An {@link Serialization} object for objects of type S
+     * @return a serialization object for this context
+     */
+    public Serialization<S> getSerialization() throws IOException;
+
+    /**
+     *  Produces the specific class to deserialize
+     */
+    public Class<? extends S> getRealClass() throws IOException;
+  }
+  
+  /**
+   * The JobConf keys for the Serialization implementation
+   */
+  static public final String SerializationImplKey = "mapred.input.serialization.implKey";
+
+  /**
+   *  An implementation of {@link SerializationContext} that reads the Serialization class and 
+   *  specific subclass to be deserialized from the JobConf.
+   *
+   */
+  static public class SerializationContextFromConf<S> implements FlatFileInputFormat.SerializationContext<S> {
+
+    /**
+     * The JobConf keys for the Class that is being deserialized.
+     */
+    static public final String SerializationSubclassKey = "mapred.input.serialization.subclassKey";
+
+    /**
+     * Implements configurable so it can use the configuration to find the right classes
+     * Note: ReflectionUtils will automatigically call setConf with the right configuration.
+     */
+    private Configuration conf;
+
+    public void setConf(Configuration conf) { 
+      this.conf = conf; 
+    }
+
+    public Configuration getConf() { 
+      return conf; 
+    }
+
+    /**
+     * @return the actual class being deserialized
+     * @exception does not currently throw IOException
+     */
+    public Class<S> getRealClass() throws IOException {
+      return (Class<S>)conf.getClass(SerializationSubclassKey, null, Object.class);
+    }
+
+    /**
+     * Looks up and instantiates the Serialization Object
+     *
+     * Important to note here that we are not relying on the Hadoop SerializationFactory part of the 
+     * Serialization framework. This is because in the case of Non-Writable Objects, we cannot make any
+     * assumptions about the uniformity of the serialization class APIs - i.e., there may not be a "write"
+     * method call and a subclass may need to implement its own Serialization classes. 
+     * The SerializationFactory currently returns the first (de)serializer that is compatible
+     * with the class to be deserialized;  in this context, that assumption isn't necessarily true.
+     *
+     * @return the serialization object for this context
+     * @exception does not currently throw any IOException
+     */
+    public Serialization<S> getSerialization() throws IOException {
+      Class<Serialization<S>> tClass = (Class<Serialization<S>>)conf.getClass(SerializationImplKey, null, Serialization.class);
+      return tClass == null ? null : (Serialization<S>)ReflectionUtils.newInstance(tClass, conf);
+    }
+  }
+
+  /** 
+   * An {@link RecordReader} for plain files with {@link Deserializer} records 
+   *
+   * Reads one row at a time of type R.
+   * R is intended to be a base class of something such as: Record, Writable, Text, ...
+   *
+   */
+  public class FlatFileRecordReader<R> implements RecordReader<Void, FlatFileInputFormat.RowContainer<R>> {
+
+    /**
+     *  An interface for a helper class for instantiating {@link Serialization} classes.
+     */
+    /**
+     * The stream in use - is fsin if not compressed, otherwise, it is dcin.
+     */
+    private final DataInputStream in;
+
+    /**
+     * The decompressed stream or null if the input is not decompressed.
+     */
+    private final InputStream dcin;
+
+    /**
+     * The underlying stream.
+     */
+    private final FSDataInputStream fsin;
+
+    /**
+     * For calculating progress
+     */
+    private final long end;
+
+    /**
+     * The constructed deserializer
+     */
+    private final Deserializer<R> deserializer;
+
+    /**
+     * Once EOF is reached, stop calling the deserializer 
+     */
+    private boolean isEOF;
+
+    /**
+     * The JobConf which contains information needed to instantiate the correct Deserializer
+     */
+    private Configuration conf;
+
+    /**
+     * The actual class of the row's we are deserializing, not just the base class
+     */
+    private Class<R> realRowClass;
+
+
+    /**
+     * FlatFileRecordReader constructor constructs the underlying stream (potentially decompressed) and 
+     * creates the deserializer.
+     *
+     * @param conf the jobconf
+     * @param split the split for this file
+     */
+    public FlatFileRecordReader(Configuration conf,
+                                FileSplit split) throws IOException {
+      final Path path = split.getPath();
+      FileSystem fileSys = path.getFileSystem(conf);
+      CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
+      final CompressionCodec codec = compressionCodecs.getCodec(path);
+      this.conf = conf;
+
+      fsin = fileSys.open(path);
+      if (codec != null) {
+        dcin = codec.createInputStream(fsin);
+        in = new DataInputStream(dcin);
+      } else {
+        dcin = null;
+        in = fsin;
+      }
+
+      isEOF = false;
+      end = split.getLength();
+
+      // Instantiate a SerializationContext which this will use to lookup the Serialization class and the 
+      // actual class being deserialized
+      SerializationContext<R> sinfo;
+      Class<SerializationContext<R>> sinfoClass = 
+        (Class<SerializationContext<R>>)conf.getClass(SerializationContextImplKey, SerializationContextFromConf.class);
+
+      sinfo =  (SerializationContext<R>)ReflectionUtils.newInstance(sinfoClass, conf);
+
+      // Get the Serialization object and the class being deserialized
+      Serialization<R> serialization = sinfo.getSerialization();
+      realRowClass  = (Class<R>)sinfo.getRealClass();
+
+      deserializer = (Deserializer<R>)serialization.getDeserializer((Class<R>)realRowClass);
+      deserializer.open(in);
+    }
+
+    /**
+     * The actual class of the data being deserialized
+     */
+    private Class<R> realRowclass;
+
+    /**
+     * The JobConf key of the SerializationContext to use
+     */
+    static public final String SerializationContextImplKey = "mapred.input.serialization.context_impl";
+
+    /**
+     * @return null
+     */
+    public Void createKey() { 
+      return null;
+    }
+
+    /**
+     * @return a new R instance.
+     */
+    public RowContainer<R> createValue() { 
+      RowContainer<R> r = new RowContainer<R>();
+      r.row = (R)ReflectionUtils.newInstance(realRowClass, conf);
+      return r;
+    }
+
+    /**
+     * Returns the next row # and value
+     *
+     * @param key - void as these files have a value only
+     * @param value - the row container which is always re-used, but the internal value may be set to a new Object
+     * @return whether the key and value were read. True if they were and false if EOF
+     * @exception IOException from the deserializer
+     */
+    public synchronized boolean next(Void key, RowContainer<R> value) throws IOException {
+      if(isEOF  || in.available() == 0) {
+        isEOF = true;
+        return false;
+      }
+
+      // the deserializer is responsible for actually reading each record from the stream
+      try {
+        value.row = deserializer.deserialize(value.row);
+        if (value.row == null) {
+          isEOF = true;
+          return false;
+        }
+        return true;
+      } catch(EOFException e) {
+        isEOF = true;
+        return false;
+      }
+    }
+
+    public synchronized float getProgress() throws IOException {
+      // this assumes no splitting                                                                                               
+      if (end == 0) {
+        return 0.0f;
+      } else {
+        // gives progress over uncompressed stream                                                                               
+        // assumes deserializer is not buffering itself
+        return Math.min(1.0f, fsin.getPos()/(float)(end));
+      }
+    }
+
+    public synchronized long getPos() throws IOException {
+      // assumes deserializer is not buffering itself
+      // position over uncompressed stream. not sure what                                                                        
+      // effect this has on stats about job                                                                                      
+      return fsin.getPos();
+    }
+
+    public synchronized void close() throws IOException {
+      // assuming that this closes the underlying streams
+      deserializer.close();
+    }
+  }
+
+  protected boolean isSplittable(FileSystem fs, Path filename) {
+    return false;
+  }
+
+  public RecordReader<Void, RowContainer<T>> getRecordReader(InputSplit split,
+                                                             JobConf job, Reporter reporter)
+    throws IOException {
+
+    reporter.setStatus(split.toString());
+
+    return new FlatFileRecordReader<T>(job, (FileSplit) split);
+  }
+}

+ 0 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java

@@ -219,7 +219,6 @@ public class HiveInputFormat<K extends WritableComparable,
     return result.toArray(new HiveInputSplit[result.size()]);
   }
 
-
   private tableDesc getTableDescFromPath(Path dir) throws IOException {
 
     partitionDesc partDesc = pathToPartitionInfo.get(dir.toString());

+ 17 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java

@@ -38,13 +38,14 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
-import org.apache.hadoop.hive.metastore.api.Constants;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
 import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.InputFormat;
@@ -170,6 +171,7 @@ public class Hive {
     }
     tbl.setSerializationLib(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class.getName());
     tbl.setNumBuckets(bucketCount);
+    tbl.setBucketCols(bucketCols);
     createTable(tbl);
   }
 
@@ -196,6 +198,9 @@ public class Hive {
   public void createTable(Table tbl) throws HiveException {
     try {
       tbl.initSerDe();
+      if(tbl.getCols().size() == 0) {
+        tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getName(), tbl.getDeserializer()));
+      }
       tbl.checkValidity();
       msc.createTable(tbl.getTTable());
     } catch (Exception e) {
@@ -652,5 +657,16 @@ public class Hive {
     }
     return new MetaStoreClient(this.conf);
   }
+
+  public static List<FieldSchema> getFieldsFromDeserializer(String name, Deserializer serde) throws HiveException {
+    try {
+      return MetaStoreUtils.getFieldsFromDeserializer(name, serde);
+    } catch (SerDeException e) {
+      throw new HiveException("Error in getting fields from serde.", e);
+    } catch (MetaException e) {
+      throw new HiveException("Error in getting fields from serde.", e);
+    }
+  }
+
   
 };

+ 4 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java

@@ -36,7 +36,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 
 /**
@@ -205,6 +204,10 @@ public class Partition {
         return(ret);
     }
 
+    public Path getPartitionPath() {
+      return this.partPath;
+    }
+
     final public URI getDataLocation() {
       return this.partURI;
     }

+ 68 - 4
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

@@ -23,6 +23,7 @@ import java.net.URI;
 import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -40,13 +41,13 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
-import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.InputFormat;
@@ -129,6 +130,14 @@ public class Table {
     sd.getSerdeInfo().setParameters(new HashMap<String, String>());
   }
   
+  public void reinitSerDe() throws HiveException {
+    try {
+      deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), this.getTTable());
+    } catch (MetaException e) {
+      throw new HiveException(e);
+    }
+  }
+  
   protected void initSerDe() throws HiveException {
     if (deserializer == null) {
       try {
@@ -138,13 +147,16 @@ public class Table {
       }
     }
   }
-
+  
   public void checkValidity() throws HiveException {
     // check for validity
     String name = getTTable().getTableName();
     if (null == name || name.length() == 0 || !MetaStoreUtils.validateName(name)) {
       throw new HiveException("[" + name + "]: is not a valid table name");
     }
+    if (0 == getCols().size()) {
+      throw new HiveException("atleast one column must be specified for the table");
+    }
     if (null == getDeserializer()) {
       throw new HiveException("must specify a non-null serDe");
     }
@@ -154,6 +166,30 @@ public class Table {
     if (null == getOutputFormatClass()) {
       throw new HiveException("must specify an OutputFormat class");
     }
+    
+    Iterator<FieldSchema> iterCols = getCols().iterator();
+    List<String> colNames = new ArrayList<String>();
+    while (iterCols.hasNext()) {
+      String colName = iterCols.next().getName();
+      Iterator<String> iter = colNames.iterator();
+      while (iter.hasNext()) {
+        String oldColName = iter.next();
+        if (colName.equalsIgnoreCase(oldColName)) 
+          throw new HiveException("Duplicate column name " + colName + " in the table definition.");
+      }
+      colNames.add(colName.toLowerCase());
+    }
+
+    if (getPartCols() != null)
+    {
+      // there is no overlap between columns and partitioning columns
+      Iterator<FieldSchema> partColsIter = getPartCols().iterator();
+      while (partColsIter.hasNext()) {
+        String partCol = partColsIter.next().getName();
+        if(colNames.contains(partCol.toLowerCase()))
+            throw new HiveException("Partition collumn name " + partCol + " conflicts with table columns.");
+      }
+    }
     return;
   }
 
@@ -190,6 +226,13 @@ public class Table {
   }
 
   final public Deserializer getDeserializer() {
+    if(deserializer == null) {
+      try {
+        initSerDe();
+      } catch (HiveException e) {
+        LOG.error("Error in initializing serde.", e);
+      }
+    }
     return deserializer;
   }
 
@@ -360,9 +403,30 @@ public class Table {
   }
 
   public List<FieldSchema> getCols() {
-    return getTTable().getSd().getCols();
+    boolean isNative = SerDeUtils.isNativeSerDe(getSerializationLib());
+    if (isNative)
+      return getTTable().getSd().getCols();
+    else {
+      try {
+        return Hive.getFieldsFromDeserializer(getName(), getDeserializer());
+      } catch (HiveException e) {
+        LOG.error("Unable to get field from serde: " + getSerializationLib(), e);
+      }
+      return new ArrayList<FieldSchema>();
+    }
   }
 
+  /**
+   * Returns a list of all the columns of the table (data columns + partition columns in that order.
+   * 
+   * @return List<FieldSchema>
+   */
+  public List<FieldSchema> getAllCols() {
+	  ArrayList<FieldSchema> f_list = new ArrayList<FieldSchema>();
+	  f_list.addAll(getPartCols());
+	  f_list.addAll(getCols());
+	  return f_list;
+  }
   public void setPartCols(List<FieldSchema> partCols) {
     getTTable().setPartitionKeys(partCols);
   }

+ 168 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java

@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.QB;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.selectDesc;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import java.io.Serializable;
+import java.util.List;
+import java.util.Iterator;
+import java.util.ArrayList;
+
+/**
+ * Implementation of one of the rule-based optimization steps. ColumnPruner gets the current operator tree. The tree is traversed to find out the columns used 
+ * for all the base tables. If all the columns for a table are not used, a select is pushed on top of that table (to select only those columns). Since this 
+ * changes the row resolver, the tree is built again. This can be optimized later to patch the tree. 
+ */
+public class ColumnPruner implements Transform {
+  private ParseContext pctx;
+  
+  /**
+   * empty constructor
+   */
+	public ColumnPruner() {
+    pctx = null;
+	}
+
+	/**
+	 * Whether some column pruning needs to be done
+	 * @param op Operator for the base table
+	 * @param colNames columns needed by the query
+	 * @return boolean
+	 */
+  private boolean pushSelect(Operator<? extends Serializable> op, List<String> colNames) {
+    if (pctx.getOpParseCtx().get(op).getRR().getColumnInfos().size() == colNames.size()) return false;
+    return true;
+  }
+
+  /**
+   * update the map between operator and row resolver
+   * @param op operator being inserted
+   * @param rr row resolver of the operator
+   * @return
+   */
+  @SuppressWarnings("nls")
+  private Operator<? extends Serializable> putOpInsertMap(Operator<? extends Serializable> op, RowResolver rr) {
+    OpParseContext ctx = new OpParseContext(rr);
+    pctx.getOpParseCtx().put(op, ctx);
+    return op;
+  }
+
+  /**
+   * insert a select to include only columns needed by the query
+   * @param input operator for the base table
+   * @param colNames columns needed
+   * @return
+   * @throws SemanticException
+   */
+  @SuppressWarnings("nls")
+  private Operator genSelectPlan(Operator input, List<String> colNames) 
+    throws SemanticException {
+
+    RowResolver inputRR  = pctx.getOpParseCtx().get(input).getRR();
+    RowResolver outputRR = new RowResolver();
+    ArrayList<exprNodeDesc> col_list = new ArrayList<exprNodeDesc>();
+    
+    // Iterate over the selects
+    for (int pos = 0; pos < colNames.size(); pos++) {
+      String   internalName = colNames.get(pos);
+      String[] colName      = inputRR.reverseLookup(internalName);
+      ColumnInfo in = inputRR.get(colName[0], colName[1]);
+      outputRR.put(colName[0], colName[1], 
+                   new ColumnInfo((Integer.valueOf(pos)).toString(), in.getType()));
+      col_list.add(new exprNodeColumnDesc(in.getType(), internalName));
+    }
+
+    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
+      new selectDesc(col_list), new RowSchema(outputRR.getColumnInfos()), input), outputRR);
+
+    return output;
+  }
+
+  /**
+   * reset parse context
+   * @param pctx parse context
+   */
+  private void resetParseContext(ParseContext pctx) {
+    pctx.getAliasToPruner().clear();
+    pctx.getAliasToSamplePruner().clear();
+    pctx.getLoadTableWork().clear();
+    pctx.getLoadFileWork().clear();
+    Iterator<Operator<? extends Serializable>> iter = pctx.getOpParseCtx().keySet().iterator();
+    while (iter.hasNext()) {
+      Operator<? extends Serializable> op = iter.next();
+      if ((!pctx.getTopOps().containsValue(op)) && (!pctx.getTopSelOps().containsValue(op)))
+        iter.remove();
+    }
+  }
+	
+  /**
+   * Transform the query tree. For each table under consideration, check if all columns are needed. If not, only select the operators needed at
+   * the beginning and proceed 
+   */
+	public ParseContext transform(ParseContext pactx) throws SemanticException {
+    this.pctx = pactx;
+    boolean done = true;
+    // generate useful columns for all the sources so that they can be pushed immediately after the table scan
+    for (String alias_id : pctx.getTopOps().keySet()) {
+      Operator<? extends Serializable> topOp = pctx.getTopOps().get(alias_id);
+      
+      // Scan the tree bottom-up and generate columns needed for the top operator
+      List<String> colNames = topOp.genColLists(pctx.getOpParseCtx());
+
+      // do we need to push a SELECT - all the columns of the table are not used
+      if (pushSelect(topOp, colNames)) {
+        topOp.setChildOperators(null);
+
+        // Generate a select and make it a child of the table scan
+        Operator select = genSelectPlan(topOp, colNames);
+        pctx.getTopSelOps().put(alias_id, select);
+        done = false;
+      }
+    }
+
+    // a select was pushed on top of the table. The old plan is no longer valid. Generate the plan again.
+    // The current tables and the select pushed above (after column pruning) are maintained in the parse context.
+    if (!done) {
+      SemanticAnalyzer sem = (SemanticAnalyzer)SemanticAnalyzerFactory.get(pctx.getConf(), pctx.getParseTree());
+      
+      resetParseContext(pctx);
+      sem.init(pctx);
+    	QB qb = new QB(null, null, false);
+    	
+    	sem.doPhase1(pctx.getParseTree(), qb, sem.initPhase1Ctx());
+    	sem.getMetaData(qb);
+    	sem.genPlan(qb);
+      pctx = sem.getParseContext();
+   	}	
+    return pctx;
+  }
+}

+ 74 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Implementation of the optimizer
+ */
+public class Optimizer {
+	private ParseContext pctx;
+	private List<Transform> transformations;
+	
+	/**
+	 * empty constructor
+	 */
+	public Optimizer() {
+	}
+
+	/**
+	 * create the list of transformations
+	 */
+	public void initialize() {
+		transformations = new ArrayList<Transform>();
+		transformations.add(new ColumnPruner());
+	}
+	
+	/**
+	 * invoke all the transformations one-by-one, and alter the query plan
+	 * @return ParseContext
+	 * @throws SemanticException
+	 */
+	public ParseContext optimize() throws SemanticException {
+		for (Transform t : transformations)
+			pctx = t.transform(pctx);
+    return pctx;
+	}
+	
+	/**
+	 * @return the pctx
+	 */
+	public ParseContext getPctx() {
+		return pctx;
+	}
+
+	/**
+	 * @param pctx the pctx to set
+	 */
+	public void setPctx(ParseContext pctx) {
+		this.pctx = pctx;
+	}
+	
+	
+}

+ 15 - 11
src/contrib/hive/serde/src/gen-java/org/apache/hadoop/hive/serde/dynamic_type/DynamicSerDeAsync.java → src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java

@@ -16,17 +16,21 @@
  * limitations under the License.
  */
 
-/* Generated By:JJTree: Do not edit this line. DynamicSerDeAsync.java */
+package org.apache.hadoop.hive.ql.optimizer;
 
-package org.apache.hadoop.hive.serde.dynamic_type;
-
-public class DynamicSerDeAsync extends SimpleNode {
-  public DynamicSerDeAsync(int id) {
-    super(id);
-  }
-
-  public DynamicSerDeAsync(thrift_grammar p, int id) {
-    super(p, id);
-  }
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 
+/**
+ * Optimizer interface. All the rule-based optimizations implement this interface. All the transformations are invoked sequentially. They take the current
+ * parse context (which contains the operator tree among other things), perform all the optimizations, and then return the updated parse context.
+ */
+public interface Transform {
+	/**
+	 * All transformation steps implement this interface
+	 * @param pctx input parse context
+	 * @return ParseContext
+	 * @throws SemanticException
+	 */
+	public ParseContext transform(ParseContext pctx) throws SemanticException;
 }

+ 44 - 15
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java

@@ -104,9 +104,10 @@ public abstract class BaseSemanticAnalyzer {
   }
 
   public static String stripQuotes(String val) throws SemanticException {
-    if (val.charAt(0) == '\'' && val.charAt(val.length() - 1) == '\'') {
+    if ((val.charAt(0) == '\'' && val.charAt(val.length() - 1) == '\'')
+      || (val.charAt(0) == '\"' && val.charAt(val.length() - 1) == '\"')) {
       val = val.substring(1, val.length() - 1);
-    }
+    } 
     return val;
   }
 
@@ -142,19 +143,48 @@ public abstract class BaseSemanticAnalyzer {
     }
   }
 
+  /**
+   * Remove the encapsulating "`" pair from the identifier.
+   * We allow users to use "`" to escape identifier for table names,
+   * column names and aliases, in case that coincide with Hive language
+   * keywords.
+   */
+  public static String unescapeIdentifier(String val) {
+    if (val == null) {
+      return null;
+    }
+    if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
+      val = val.substring(1, val.length() - 1);
+    } 
+    return val;
+  }
+
   @SuppressWarnings("nls")
   public static String unescapeSQLString(String b) {
-    assert(b.charAt(0) == '\'');
-    assert(b.charAt(b.length()-1) == '\'');
+
+    Character enclosure = null;
 
     // Some of the strings can be passed in as unicode. For example, the
     // delimiter can be passed in as \002 - So, we first check if the 
     // string is a unicode number, else go back to the old behavior
     StringBuilder sb = new StringBuilder(b.length());
-    int i = 1;
-    while (i < (b.length()-1)) {
-
-      if (b.charAt(i) == '\\' && (i+4 < b.length())) {
+    for (int i=0; i < b.length(); i++) {
+      
+      char currentChar = b.charAt(i);
+      if (enclosure == null) {
+        if (currentChar == '\'' || b.charAt(i) == '\"') {
+          enclosure = currentChar;
+        }
+        // ignore all other chars outside the enclosure
+        continue;
+      }
+      
+      if (enclosure.equals(currentChar)) {
+        enclosure = null;
+        continue;
+      }
+      
+      if (currentChar == '\\' && (i+4 < b.length())) {
         char i1 = b.charAt(i+1);
         char i2 = b.charAt(i+2);
         char i3 = b.charAt(i+3);
@@ -167,12 +197,12 @@ public abstract class BaseSemanticAnalyzer {
           bValArr[0] = bVal;
           String tmp = new String(bValArr);
           sb.append(tmp);
-          i += 4;
+          i += 3;
           continue;
         }
       }
-        
-      if (b.charAt(i) == '\\' && (i+2 < b.length())) {
+
+      if (currentChar == '\\' && (i+2 < b.length())) {
         char n=b.charAt(i+1);
         switch(n) {
         case '0': sb.append("\0"); break;
@@ -191,9 +221,8 @@ public abstract class BaseSemanticAnalyzer {
         }
         i++;
       } else {
-        sb.append(b.charAt(i));
+        sb.append(currentChar);
       }
-      i++;
     }
     return sb.toString();
   }
@@ -219,7 +248,7 @@ public abstract class BaseSemanticAnalyzer {
 
       try {
         // get table metadata
-        tableName = ast.getChild(0).getText();
+        tableName = unescapeIdentifier(ast.getChild(0).getText());
         tableHandle = db.getTable(tableName);
 
         // get partition metadata if partition specified
@@ -230,7 +259,7 @@ public abstract class BaseSemanticAnalyzer {
           for (int i = 0; i < partspec.getChildCount(); ++i) {
             CommonTree partspec_val = (CommonTree) partspec.getChild(i);
             String val = stripQuotes(partspec_val.getChild(1).getText());
-            partSpec.put(partspec_val.getChild(0).getText(), val);
+            partSpec.put(unescapeIdentifier(partspec_val.getChild(0).getText()), val);
           }
           partHandle = Hive.get().getPartition(tableHandle, partSpec, forceCreatePartition);
           if(partHandle == null) {

+ 89 - 18
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java

@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.antlr.runtime.tree.CommonTree;
 import org.antlr.runtime.tree.Tree;
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
@@ -45,12 +46,15 @@ import org.apache.hadoop.hive.ql.plan.showPartitionsDesc;
 import org.apache.hadoop.hive.ql.plan.showTablesDesc;
 import org.apache.hadoop.hive.ql.plan.alterTableDesc.alterTableTypes;
 import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
 
 public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
   private static final Log LOG = LogFactory.getLog("hive.ql.parse.DDLSemanticAnalyzer");
   public static final Map<Integer, String> TokenToTypeName = new HashMap<Integer, String>();
   static {
+    TokenToTypeName.put(HiveParser.TOK_BOOLEAN, Constants.BOOLEAN_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_TINYINT, Constants.TINYINT_TYPE_NAME);
+    TokenToTypeName.put(HiveParser.TOK_SMALLINT, Constants.SMALLINT_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_INT, Constants.INT_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_BIGINT, Constants.BIGINT_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_FLOAT, Constants.FLOAT_TYPE_NAME);
@@ -96,18 +100,26 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
       analyzeAlterTableModifyCols(ast, alterTableTypes.REPLACECOLS);
     else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_DROPPARTS)
       analyzeAlterTableDropParts(ast);
+    else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_PROPERTIES)
+      analyzeAlterTableProps(ast);
+    else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES)
+      analyzeAlterTableSerdeProps(ast);
+    else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_SERIALIZER)
+      analyzeAlterTableSerde(ast);
     else if (ast.getToken().getType() == HiveParser.TOK_SHOWPARTITIONS)
     {
       ctx.setResFile(new Path(getTmpFileName()));
       analyzeShowPartitions(ast);
     }
+    else {
+      throw new SemanticException("Unsupported command.");
+    }
   }
 
   private void analyzeCreateTable(CommonTree ast, boolean isExt) 
     throws SemanticException {
-    String            tableName     = ast.getChild(0).getText();
-    CommonTree        colList       = (CommonTree)ast.getChild(1);
-    List<FieldSchema> cols          = getColumns(colList);
+    String            tableName     = unescapeIdentifier(ast.getChild(0).getText());
+    List<FieldSchema> cols          = null;
     List<FieldSchema> partCols      = null;
     List<String>      bucketCols    = null;
     List<Order>       sortCols      = null;
@@ -117,19 +129,23 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     String            mapKeyDelim   = null;
     String            lineDelim     = null;
     String            comment       = null;
-    boolean           isSequenceFile  = false;
+    boolean           isSequenceFile  = 
+      "SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT));
     String            location      = null;
     String            serde         = null;
     Map<String, String> mapProp     = null;
 
     LOG.info("Creating table" + tableName);    
     int numCh = ast.getChildCount();
-    for (int num = 2; num < numCh; num++)
+    for (int num = 1; num < numCh; num++)
     {
       CommonTree child = (CommonTree)ast.getChild(num);
       switch (child.getToken().getType()) {
+        case HiveParser.TOK_TABCOLLIST:
+          cols = getColumns(child);
+          break;
         case HiveParser.TOK_TABLECOMMENT:
-          comment = child.getChild(0).getText();
+          comment = unescapeSQLString(child.getChild(0).getText());
           break;
         case HiveParser.TOK_TABLEPARTCOLS:
           partCols = getColumns((CommonTree)child.getChild(0));
@@ -181,6 +197,9 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
         case HiveParser.TOK_TBLSEQUENCEFILE:
           isSequenceFile = true;
           break;
+        case HiveParser.TOK_TBLTEXTFILE:
+          isSequenceFile = false;
+          break;
         case HiveParser.TOK_TABLELOCATION:
           location = unescapeSQLString(child.getChild(0).getText());
           break;
@@ -203,6 +222,15 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     // no duplicate column names
     // currently, it is a simple n*n algorithm - this can be optimized later if need be
     // but it should not be a major bottleneck as the number of columns are anyway not so big
+    
+    if((crtTblDesc.getCols() == null) || (crtTblDesc.getCols().size() == 0)) {
+      // for now make sure that serde exists
+      if(StringUtils.isEmpty(crtTblDesc.getSerName()) || SerDeUtils.isNativeSerDe(crtTblDesc.getSerName())) {
+        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
+      }
+      return;
+    }
+    
     Iterator<FieldSchema> iterCols = crtTblDesc.getCols().iterator();
     List<String> colNames = new ArrayList<String>();
     while (iterCols.hasNext()) {
@@ -264,9 +292,9 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
         String partCol = partColsIter.next().getName();
         Iterator<String> colNamesIter = colNames.iterator();
         while (colNamesIter.hasNext()) {
-          String colName = colNamesIter.next();
+          String colName = unescapeIdentifier(colNamesIter.next());
           if (partCol.equalsIgnoreCase(colName)) 
-            throw new SemanticException(ErrorMsg.COLUMN_REPAEATED_IN_PARTITIONING_COLS.getMsg());
+            throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
         }
       }
     }
@@ -274,11 +302,52 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
   
   private void analyzeDropTable(CommonTree ast) 
     throws SemanticException {
-    String tableName = ast.getChild(0).getText();    
+    String tableName = unescapeIdentifier(ast.getChild(0).getText());    
     dropTableDesc dropTblDesc = new dropTableDesc(tableName);
     rootTasks.add(TaskFactory.get(new DDLWork(dropTblDesc), conf));
   }
 
+  private void analyzeAlterTableProps(CommonTree ast) throws SemanticException { 
+    String tableName = unescapeIdentifier(ast.getChild(0).getText());    
+    HashMap<String, String> mapProp = getProps((CommonTree)(ast.getChild(1)).getChild(0));
+    alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDPROPS);
+    alterTblDesc.setProps(mapProp);
+    alterTblDesc.setOldName(tableName);
+    rootTasks.add(TaskFactory.get(new DDLWork(alterTblDesc), conf));
+  }
+
+  private void analyzeAlterTableSerdeProps(CommonTree ast) throws SemanticException { 
+    String tableName = unescapeIdentifier(ast.getChild(0).getText());    
+    HashMap<String, String> mapProp = getProps((CommonTree)(ast.getChild(1)).getChild(0));
+    alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDSERDEPROPS);
+    alterTblDesc.setProps(mapProp);
+    alterTblDesc.setOldName(tableName);
+    rootTasks.add(TaskFactory.get(new DDLWork(alterTblDesc), conf));
+  }
+
+  private void analyzeAlterTableSerde(CommonTree ast) throws SemanticException { 
+    String tableName = unescapeIdentifier(ast.getChild(0).getText());    
+    String serdeName = unescapeSQLString(ast.getChild(1).getText());
+    alterTableDesc alterTblDesc = new alterTableDesc(alterTableTypes.ADDSERDE);
+    if(ast.getChildCount() > 2) {
+      HashMap<String, String> mapProp = getProps((CommonTree)(ast.getChild(2)).getChild(0));
+      alterTblDesc.setProps(mapProp);
+    }
+    alterTblDesc.setOldName(tableName);
+    alterTblDesc.setSerdeName(serdeName);
+    rootTasks.add(TaskFactory.get(new DDLWork(alterTblDesc), conf));
+  }
+
+  private HashMap<String, String> getProps(CommonTree prop) {
+    HashMap<String, String> mapProp = new HashMap<String, String>();
+    for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
+      String key = unescapeSQLString(prop.getChild(propChild).getChild(0).getText());
+      String value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText());
+      mapProp.put(key,value);
+    }
+    return mapProp;
+  }
+
   private List<FieldSchema> getColumns(CommonTree ast)
   {
     List<FieldSchema> colList = new ArrayList<FieldSchema>();
@@ -286,7 +355,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     for (int i = 0; i < numCh; i++) {
       FieldSchema col = new FieldSchema();
       CommonTree child = (CommonTree)ast.getChild(i);
-      col.setName(child.getChild(0).getText());
+      col.setName(unescapeIdentifier(child.getChild(0).getText()));
       CommonTree typeChild = (CommonTree)(child.getChild(1));
       if (typeChild.getToken().getType() == HiveParser.TOK_LIST)
       {
@@ -303,7 +372,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
         col.setType(getTypeName(typeChild.getToken().getType()));
         
       if (child.getChildCount() == 3)
-        col.setComment(child.getChild(2).getText());
+        col.setComment(unescapeSQLString(child.getChild(2).getText()));
       colList.add(col);
     }
     return colList;
@@ -315,7 +384,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     int numCh = ast.getChildCount();
     for (int i = 0; i < numCh; i++) {
       CommonTree child = (CommonTree)ast.getChild(i);
-      colList.add(child.getText());
+      colList.add(unescapeIdentifier(child.getText()));
     }
     return colList;
   }
@@ -327,9 +396,9 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     for (int i = 0; i < numCh; i++) {
       CommonTree child = (CommonTree)ast.getChild(i);
       if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC)
-        colList.add(new Order(child.getChild(0).getText(), 1));
+        colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), 1));
       else
-        colList.add(new Order(child.getChild(0).getText(), 0));
+        colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()), 0));
     }
     return colList;
   }
@@ -359,7 +428,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
   private void analyzeShowPartitions(CommonTree ast) 
   throws SemanticException {
     showPartitionsDesc showPartsDesc;
-    String tableName = ast.getChild(0).getText();
+    String tableName = unescapeIdentifier(ast.getChild(0).getText());
     showPartsDesc = new showPartitionsDesc(tableName, ctx.getResFile());
     rootTasks.add(TaskFactory.get(new DDLWork(showPartsDesc), conf));
   }
@@ -379,13 +448,15 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
 
   private void analyzeAlterTableRename(CommonTree ast) 
   throws SemanticException {
-    alterTableDesc alterTblDesc = new alterTableDesc(ast.getChild(0).getText(), ast.getChild(1).getText());
+    alterTableDesc alterTblDesc = new alterTableDesc(
+        unescapeIdentifier(ast.getChild(0).getText()),
+        unescapeIdentifier(ast.getChild(1).getText()));
     rootTasks.add(TaskFactory.get(new DDLWork(alterTblDesc), conf));
   }
 
   private void analyzeAlterTableModifyCols(CommonTree ast, alterTableTypes alterType) 
   throws SemanticException {
-    String tblName = ast.getChild(0).getText();
+    String tblName = unescapeIdentifier(ast.getChild(0).getText());
     List<FieldSchema> newCols = getColumns((CommonTree)ast.getChild(1));
     alterTableDesc alterTblDesc = new alterTableDesc(tblName, newCols, alterType);
     rootTasks.add(TaskFactory.get(new DDLWork(alterTblDesc), conf));
@@ -396,7 +467,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
     List<HashMap<String, String>> partSpecs = new ArrayList<HashMap<String, String>>();
     int childIndex = 0;
     // get table metadata
-    tblName = ast.getChild(0).getText();
+    tblName = unescapeIdentifier(ast.getChild(0).getText());
     // get partition metadata if partition specified
     for( childIndex = 1; childIndex < ast.getChildCount(); childIndex++) {
       CommonTree partspec = (CommonTree) ast.getChild(childIndex);

+ 8 - 3
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java

@@ -38,6 +38,7 @@ public enum ErrorMsg {
   INVALID_OPERATOR_SIGNATURE("Operator Argument Type Mismatch"),
   INVALID_JOIN_CONDITION_1("Both Left and Right Aliases Encountered in Join"),
   INVALID_JOIN_CONDITION_2("Neither Left nor Right Aliases Encountered in Join"),
+  INVALID_JOIN_CONDITION_3("OR not supported in Join currently"),
   INVALID_TRANSFORM("TRANSFORM with Other Select Columns not Supported"),
   DUPLICATE_GROUPBY_KEY("Repeated Key in Group By"),
   UNSUPPORTED_MULTIPLE_DISTINCTS("DISTINCT on Different Columns not Supported"),
@@ -52,14 +53,18 @@ public enum ErrorMsg {
   INVALID_MAPINDEX_TYPE("Map Key Type does not Match Index Expression Type"),
   NON_COLLECTION_TYPE("[] not Valid on Non Collection Types"),
   SELECT_DISTINCT_WITH_GROUPBY("SELECT DISTINCT and GROUP BY can not be in the same query"),
-  COLUMN_REPAEATED_IN_PARTITIONING_COLS("Column repeated in partitioning columns"),
+  COLUMN_REPEATED_IN_PARTITIONING_COLS("Column repeated in partitioning columns"),
   DUPLICATE_COLUMN_NAMES("Duplicate column names"),
   COLUMN_REPEATED_IN_CLUSTER_SORT("Same column cannot appear in cluster and sort by"),
   SAMPLE_RESTRICTION("Cannot Sample on More Than Two Columns"),
   SAMPLE_COLUMN_NOT_FOUND("Sample Column Not Found"),
   NO_PARTITION_PREDICATE("No Partition Predicate Found"),
-  INVALID_DOT(". operator is only supported on struct or list of struct types");
-  
+  INVALID_DOT(". operator is only supported on struct or list of struct types"),
+  INVALID_TBL_DDL_SERDE("Either list of columns or a custom serializer should be specified"),
+  TARGET_TABLE_COLUMN_MISMATCH("Cannot insert into target table because column number/types are different"),
+  TABLE_ALIAS_NOT_ALLOWED("Table Alias not Allowed in Sampling Clause"),
+  NON_BUCKETED_TABLE("Sampling Expression Needed for Non-Bucketed Table");
+
   private String mesg;
   ErrorMsg(String mesg) {
     this.mesg = mesg;

+ 62 - 36
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g

@@ -49,7 +49,7 @@ TOK_OP_LIKE;
 TOK_TRUE;
 TOK_FALSE;
 TOK_TRANSFORM;
-TOK_COLLIST;
+TOK_EXPLIST;
 TOK_ALIASLIST;
 TOK_GROUPBY;
 TOK_ORDERBY;
@@ -64,6 +64,7 @@ TOK_NULL;
 TOK_ISNULL;
 TOK_ISNOTNULL;
 TOK_TINYINT;
+TOK_SMALLINT;
 TOK_INT;
 TOK_BIGINT;
 TOK_BOOLEAN;
@@ -81,6 +82,9 @@ TOK_ALTERTABLE_RENAME;
 TOK_ALTERTABLE_ADDCOLS;
 TOK_ALTERTABLE_REPLACECOLS;
 TOK_ALTERTABLE_DROPPARTS;
+TOK_ALTERTABLE_SERDEPROPERTIES;
+TOK_ALTERTABLE_SERIALIZER;
+TOK_ALTERTABLE_PROPERTIES;
 TOK_SHOWTABLES;
 TOK_SHOWPARTITIONS;
 TOK_CREATEEXTTABLE;
@@ -96,6 +100,7 @@ TOK_TABLEROWFORMATCOLLITEMS;
 TOK_TABLEROWFORMATMAPKEYS;
 TOK_TABLEROWFORMATLINES;
 TOK_TBLSEQUENCEFILE;
+TOK_TBLTEXTFILE;
 TOK_TABCOLNAME;
 TOK_TABLELOCATION;
 TOK_TABLESAMPLE;
@@ -106,10 +111,10 @@ TOK_CHARSETLITERAL;
 TOK_CREATEFUNCTION;
 TOK_EXPLAIN;
 TOK_TABLESERIALIZER;
-TOK_TABLSERDEPROPERTIES;
-TOK_TABLESERDEPROPLIST;
+TOK_TABLEPROPERTIES;
+TOK_TABLEPROPLIST;
 TOK_LIMIT;
-TOKTABLESERDEPROPERTY;
+TOK_TABLEPROPERTY;
 }
 
 
@@ -157,9 +162,9 @@ ddlStatement
     ;
 
 createStatement
-    : KW_CREATE (ext=KW_EXTERNAL)? KW_TABLE name=Identifier LPAREN columnNameTypeList RPAREN tableComment? tablePartition? tableBuckets? tableRowFormat? tableFileFormat? tableLocation?
-    -> {$ext == null}? ^(TOK_CREATETABLE $name columnNameTypeList tableComment? tablePartition? tableBuckets? tableRowFormat? tableFileFormat? tableLocation?)
-    ->                 ^(TOK_CREATEEXTTABLE $name columnNameTypeList tableComment? tablePartition? tableBuckets? tableRowFormat? tableFileFormat? tableLocation?)
+    : KW_CREATE (ext=KW_EXTERNAL)? KW_TABLE name=Identifier (LPAREN columnNameTypeList RPAREN)? tableComment? tablePartition? tableBuckets? tableRowFormat? tableFileFormat? tableLocation?
+    -> {$ext == null}? ^(TOK_CREATETABLE $name columnNameTypeList? tableComment? tablePartition? tableBuckets? tableRowFormat? tableFileFormat? tableLocation?)
+    ->                 ^(TOK_CREATEEXTTABLE $name columnNameTypeList? tableComment? tablePartition? tableBuckets? tableRowFormat? tableFileFormat? tableLocation?)
     ;
 
 dropStatement
@@ -170,6 +175,8 @@ alterStatement
     : alterStatementRename
     | alterStatementAddCol
     | alterStatementDropPartitions
+    | alterStatementProperties
+    | alterStatementSerdeProperties
     ;
 
 alterStatementRename
@@ -188,6 +195,18 @@ alterStatementDropPartitions
     -> ^(TOK_ALTERTABLE_DROPPARTS Identifier partitionSpec+)
     ;
 
+alterStatementProperties
+    : KW_ALTER KW_TABLE name=Identifier KW_SET KW_PROPERTIES tableProperties
+    -> ^(TOK_ALTERTABLE_PROPERTIES $name tableProperties)
+    ;
+
+alterStatementSerdeProperties
+    : KW_ALTER KW_TABLE name=Identifier KW_SET KW_SERDE serde=StringLiteral (KW_WITH KW_SERDEPROPERTIES tableProperties)?
+    -> ^(TOK_ALTERTABLE_SERIALIZER $name $serde tableProperties?)
+    | KW_ALTER KW_TABLE name=Identifier KW_SET KW_SERDEPROPERTIES tableProperties
+    -> ^(TOK_ALTERTABLE_SERDEPROPERTIES $name tableProperties)
+    ;
+
 descStatement
     : KW_DESCRIBE (isExtended=KW_EXTENDED)? (tab=tabName)  -> ^(TOK_DESCTABLE $tab $isExtended?)
     ;
@@ -227,23 +246,23 @@ tableRowFormat
     :
       KW_ROW KW_FORMAT KW_DELIMITED tableRowFormatFieldIdentifier? tableRowFormatCollItemsIdentifier? tableRowFormatMapKeysIdentifier? tableRowFormatLinesIdentifier? 
     -> ^(TOK_TABLEROWFORMAT tableRowFormatFieldIdentifier? tableRowFormatCollItemsIdentifier? tableRowFormatMapKeysIdentifier? tableRowFormatLinesIdentifier?)
-    | KW_ROW KW_FORMAT KW_SERIALIZER name=StringLiteral tableSerializerProperties?
-    -> ^(TOK_TABLESERIALIZER $name tableSerializerProperties?)
+    | KW_ROW KW_FORMAT KW_SERDE name=StringLiteral (KW_WITH KW_SERDEPROPERTIES serdeprops=tableProperties)?
+    -> ^(TOK_TABLESERIALIZER $name $serdeprops?)
     ;
 
-tableSerializerProperties
+tableProperties
     :
-      KW_WITH KW_PROPERTIES LPAREN propertiesList RPAREN -> ^(TOK_TABLSERDEPROPERTIES propertiesList)
+      LPAREN propertiesList RPAREN -> ^(TOK_TABLEPROPERTIES propertiesList)
     ;
 
 propertiesList
     :
-      keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_TABLESERDEPROPLIST keyValueProperty+)
+      keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_TABLEPROPLIST keyValueProperty+)
     ;
 
 keyValueProperty
     :
-      key=StringLiteral EQUAL value=StringLiteral -> ^(TOKTABLESERDEPROPERTY $key $value)
+      key=StringLiteral EQUAL value=StringLiteral -> ^(TOK_TABLEPROPERTY $key $value)
     ;
 
 tableRowFormatFieldIdentifier
@@ -273,6 +292,7 @@ tableRowFormatLinesIdentifier
 tableFileFormat
     :
       KW_STORED KW_AS KW_SEQUENCEFILE  -> TOK_TBLSEQUENCEFILE
+      | KW_STORED KW_AS KW_TEXTFILE  -> TOK_TBLTEXTFILE
     ;
 
 tableLocation
@@ -317,6 +337,7 @@ colType
 
 primitiveType
     : KW_TINYINT       ->    TOK_TINYINT
+    | KW_SMALLINT      ->    TOK_SMALLINT
     | KW_INT           ->    TOK_INT
     | KW_BIGINT        ->    TOK_BIGINT
     | KW_BOOLEAN       ->    TOK_BOOLEAN
@@ -420,23 +441,22 @@ selectClause
 
 selectList
     :
-    selectItem
-    ( COMMA  selectItem )* -> selectItem+
+    selectItem ( COMMA  selectItem )* -> selectItem+
+    | trfmClause -> ^(TOK_SELEXPR trfmClause)
     ;
 
 selectItem
     :
-      trfmClause -> ^(TOK_SELEXPR trfmClause)
-    | (selectExpression  (KW_AS Identifier)?) -> ^(TOK_SELEXPR selectExpression Identifier?)
+    ( selectExpression  (KW_AS Identifier)?) -> ^(TOK_SELEXPR selectExpression Identifier?)
     ;
     
 trfmClause
     :
     KW_TRANSFORM
-    LPAREN columnList RPAREN
-    KW_AS 
-    LPAREN aliasList RPAREN
-    KW_USING StringLiteral -> ^(TOK_TRANSFORM columnList aliasList StringLiteral)
+    LPAREN expressionList RPAREN
+    KW_USING StringLiteral
+    (KW_AS LPAREN aliasList RPAREN)?
+    -> ^(TOK_TRANSFORM expressionList StringLiteral aliasList?)
     ;
     
 selectExpression
@@ -448,18 +468,19 @@ selectExpression
 
 tableAllColumns
     :
-    Identifier DOT STAR -> ^(TOK_ALLCOLREF Identifier)
+    STAR -> ^(TOK_ALLCOLREF)
+    | Identifier DOT STAR -> ^(TOK_ALLCOLREF Identifier)
     ;
     
 // table.column
 tableColumn
     :
-    (tab=Identifier)? DOT col=Identifier -> ^(TOK_COLREF $tab? $col)
+    (tab=Identifier  DOT)? col=Identifier -> ^(TOK_COLREF $tab? $col)
     ;
 
-columnList
+expressionList
     :
-    tableColumn (COMMA tableColumn)* -> ^(TOK_COLLIST tableColumn+)
+    expression (COMMA expression)* -> ^(TOK_EXPLIST expression+)
     ;
 
 aliasList
@@ -478,7 +499,7 @@ fromClause
 joinSource    
     :
     fromSource 
-    ( joinToken^ fromSource (KW_ON! precedenceEqualExpression)? )+
+    ( joinToken^ fromSource (KW_ON! expression)? )+
     ;
 
 joinToken
@@ -496,7 +517,7 @@ fromSource
     
 tableSample
     :
-    KW_TABLESAMPLE LPAREN KW_BUCKET (numerator=Number) KW_OUT KW_OF (denominator=Number) (KW_ON col+=Identifier (COMMA col+=Identifier)*)? RPAREN -> ^(TOK_TABLESAMPLE $numerator $denominator $col*)
+    KW_TABLESAMPLE LPAREN KW_BUCKET (numerator=Number) KW_OUT KW_OF (denominator=Number) (KW_ON expr+=expression (COMMA expr+=expression)*)? RPAREN -> ^(TOK_TABLESAMPLE $numerator $denominator $expr*)
     ;
 
 tableSource
@@ -570,12 +591,12 @@ function
     : // LEFT and RIGHT keywords are also function names
     Identifier
     LPAREN (
-          (dist=KW_DISTINCT)?
-          expression
-          (COMMA expression)*
+          ((dist=KW_DISTINCT)?
+           expression
+           (COMMA expression)*)?
         )?
-    RPAREN -> {$dist == null}? ^(TOK_FUNCTION Identifier expression+)
-                          -> ^(TOK_FUNCTIONDI Identifier expression+)
+    RPAREN -> {$dist == null}? ^(TOK_FUNCTION Identifier (expression+)?)
+                          -> ^(TOK_FUNCTIONDI Identifier (expression+)?)
 
     ;
 
@@ -644,7 +665,7 @@ precedenceBitwiseXorExpression
     precedenceUnaryExpression (precedenceBitwiseXorOperator^ precedenceUnaryExpression)*
     ;
 
-
+	
 precedenceStarOperator
     :
     STAR | DIVIDE | MOD
@@ -808,6 +829,7 @@ KW_TO: 'TO';
 KW_COMMENT: 'COMMENT';
 KW_BOOLEAN: 'BOOLEAN';
 KW_TINYINT: 'TINYINT';
+KW_SMALLINT: 'SMALLINT';
 KW_INT: 'INT';
 KW_BIGINT: 'BIGINT';
 KW_FLOAT: 'FLOAT';
@@ -834,6 +856,7 @@ KW_KEYS: 'KEYS';
 KW_LINES: 'LINES';
 KW_STORED: 'STORED';
 KW_SEQUENCEFILE: 'SEQUENCEFILE';
+KW_TEXTFILE: 'TEXTFILE';
 KW_LOCATION: 'LOCATION';
 KW_TABLESAMPLE: 'TABLESAMPLE';
 KW_BUCKET: 'BUCKET';
@@ -849,10 +872,12 @@ KW_TEMPORARY: 'TEMPORARY';
 KW_FUNCTION: 'FUNCTION';
 KW_EXPLAIN: 'EXPLAIN';
 KW_EXTENDED: 'EXTENDED';
-KW_SERIALIZER: 'SERIALIZER';
+KW_SERDE: 'SERDE';
 KW_WITH: 'WITH';
-KW_PROPERTIES: 'SERDEPROPERTIES';
+KW_SERDEPROPERTIES: 'SERDEPROPERTIES';
 KW_LIMIT: 'LIMIT';
+KW_SET: 'SET';
+KW_PROPERTIES: 'TBLPROPERTIES';
 
 // Operators
 
@@ -909,7 +934,7 @@ Exponent
 
 StringLiteral
     :
-    '\'' (~'\'')* '\'' ( '\'' (~'\'')* '\'' )*
+    ( '\'' (~'\'')* '\'' | '\"' (~'\"')* '\"' )+
     ;
 
 CharSetLiteral
@@ -926,6 +951,7 @@ Number
 Identifier
     :
     (Letter | Digit) (Letter | Digit | '_')*
+    | '`' (Letter | Digit) (Letter | Digit | '_')* '`'
     ;
 
 CharSetName

+ 69 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java

@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+
+/**
+ * Implementation of the Operator Parse Context. It maintains the parse context
+ * that may be needed by an operator. Currently, it only maintains the row
+ * resolver and the list of columns used by the operator
+ **/
+
+public class OpParseContext {
+  private RowResolver rr;  // row resolver for the operator
+
+  // list of internal column names used
+  private List<String> colNames;
+
+  /**
+   * @param rr row resolver
+   */
+  public OpParseContext(RowResolver rr) {
+    this.rr = rr;
+  }
+
+  /**
+   * @return the row resolver
+   */
+  public RowResolver getRR() {
+    return rr;
+  }
+
+  /**
+   * @param rr the row resolver to set
+   */
+  public void setRR(RowResolver rr) {
+    this.rr = rr;
+  }
+
+  /**
+   * @return the column names desired
+   */
+  public List<String> getColNames() {
+    return colNames;
+  }
+
+  /**
+   * @param colNames the column names to set
+   */
+  public void setColNames(List<String> colNames) {
+    this.colNames = colNames;
+  }
+}

+ 0 - 68
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OperatorInfo.java

@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.parse;
-
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.parse.RowResolver;
-
-/**
- * Implementation of OperatorInfo which bundles the operator and its output row resolver
- *
- **/
-
-public class OperatorInfo implements Cloneable {
-    private Operator<?> op;
-    private RowResolver rr;
-    
-    public OperatorInfo(Operator<?> op, RowResolver rr) {
-      this.op = op;
-      this.rr = rr;
-    }
-    
-    public Object clone() {
-      return new OperatorInfo(op, rr);
-    }
-    
-    public Operator<?> getOp() {
-      return op;
-    }
-
-    public void setOp(Operator<?> op) {
-      this.op = op;
-    }
-
-    public RowResolver getRowResolver() {
-      return rr;
-    }
-
-    public void setRowResolver(RowResolver rr) {
-      this.rr = rr;
-    }
-
-    public String toString() {
-      StringBuffer sb = new StringBuffer();
-      String terminal_str = op.toString();
-      sb.append(terminal_str.substring(terminal_str.lastIndexOf('.')+1));
-      sb.append("[");
-      sb.append(rr.toString());
-      sb.append("]");
-      return sb.toString();
-    }
-}
-

+ 264 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java

@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.loadFileDesc;
+import org.apache.hadoop.hive.ql.plan.loadTableDesc;
+import org.antlr.runtime.tree.CommonTree;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+/**
+ * Parse Context: The current parse context. This is passed to the optimizer
+ * which then transforms the operator tree using the parse context. All the
+ * optimizations are performed sequentially and then the new parse context
+ * populated. Note that since the parse context contains the operator tree, it
+ * can be easily retrieved by the next optimization step or finally for task
+ * generation after the plan has been completely optimized.
+ * 
+ **/
+
+public class ParseContext {
+  private QB qb;
+  private CommonTree ast;
+  private HashMap<String, PartitionPruner> aliasToPruner;
+  private HashMap<String, SamplePruner> aliasToSamplePruner;
+  private HashMap<String, Operator<? extends Serializable>> topOps;
+  private HashMap<String, Operator<? extends Serializable>> topSelOps;
+  private HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx;
+  private List<loadTableDesc> loadTableWork;
+  private List<loadFileDesc> loadFileWork;
+  private Context ctx;
+  private HiveConf conf;
+
+  /**
+   * @param qb
+   *          current QB
+   * @param ast
+   *          current parse tree
+   * @param aliasToPruner
+   *          partition pruner list
+   * @param aliasToSamplePruner
+   *          sample pruner list
+   * @param loadFileWork
+   *          list of destination files being loaded
+   * @param loadTableWork
+   *          list of destination tables being loaded
+   * @param opParseCtx
+   *          operator parse context - contains a mapping from operator to
+   *          operator parse state (row resolver etc.)
+   * @param topOps
+   *          list of operators for the top query
+   * @param topSelOps
+   *          list of operators for the selects introduced for column pruning
+   */
+  public ParseContext(HiveConf conf, QB qb, CommonTree ast,
+      HashMap<String, PartitionPruner> aliasToPruner,
+      HashMap<String, SamplePruner> aliasToSamplePruner,
+      HashMap<String, Operator<? extends Serializable>> topOps,
+      HashMap<String, Operator<? extends Serializable>> topSelOps,
+      HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx,
+      List<loadTableDesc> loadTableWork, List<loadFileDesc> loadFileWork,
+      Context ctx) {
+    this.conf = conf;
+    this.qb = qb;
+    this.ast = ast;
+    this.aliasToPruner = aliasToPruner;
+    this.aliasToSamplePruner = aliasToSamplePruner;
+    this.loadFileWork = loadFileWork;
+    this.loadTableWork = loadTableWork;
+    this.opParseCtx = opParseCtx;
+    this.topOps = topOps;
+    this.topSelOps = topSelOps;
+    this.ctx = ctx;
+  }
+
+  /**
+   * @return the qb
+   */
+  public QB getQB() {
+    return qb;
+  }
+
+  /**
+   * @param qb
+   *          the qb to set
+   */
+  public void setQB(QB qb) {
+    this.qb = qb;
+  }
+
+  /**
+   * @return the context
+   */
+  public Context getContext() {
+    return ctx;
+  }
+
+  /**
+   * @param ctx
+   *          the context to set
+   */
+  public void setContext(Context ctx) {
+    this.ctx = ctx;
+  }
+
+  /**
+   * @return the hive conf
+   */
+  public HiveConf getConf() {
+    return conf;
+  }
+
+  /**
+   * @param conf
+   *          the conf to set
+   */
+  public void setConf(HiveConf conf) {
+    this.conf = conf;
+  }
+
+  /**
+   * @return the ast
+   */
+  public CommonTree getParseTree() {
+    return ast;
+  }
+
+  /**
+   * @param ast
+   *          the parsetree to set
+   */
+  public void setParseTree(CommonTree ast) {
+    this.ast = ast;
+  }
+
+  /**
+   * @return the aliasToPruner
+   */
+  public HashMap<String, PartitionPruner> getAliasToPruner() {
+    return aliasToPruner;
+  }
+
+  /**
+   * @param aliasToPruner
+   *          the aliasToPruner to set
+   */
+  public void setAliasToPruner(HashMap<String, PartitionPruner> aliasToPruner) {
+    this.aliasToPruner = aliasToPruner;
+  }
+
+  /**
+   * @return the aliasToSamplePruner
+   */
+  public HashMap<String, SamplePruner> getAliasToSamplePruner() {
+    return aliasToSamplePruner;
+  }
+
+  /**
+   * @param aliasToSamplePruner
+   *          the aliasToSamplePruner to set
+   */
+  public void setAliasToSamplePruner(
+      HashMap<String, SamplePruner> aliasToSamplePruner) {
+    this.aliasToSamplePruner = aliasToSamplePruner;
+  }
+
+  /**
+   * @return the topOps
+   */
+  public HashMap<String, Operator<? extends Serializable>> getTopOps() {
+    return topOps;
+  }
+
+  /**
+   * @param topOps
+   *          the topOps to set
+   */
+  public void setTopOps(HashMap<String, Operator<? extends Serializable>> topOps) {
+    this.topOps = topOps;
+  }
+
+  /**
+   * @return the topSelOps
+   */
+  public HashMap<String, Operator<? extends Serializable>> getTopSelOps() {
+    return topSelOps;
+  }
+
+  /**
+   * @param topSelOps
+   *          the topSelOps to set
+   */
+  public void setTopSelOps(
+      HashMap<String, Operator<? extends Serializable>> topSelOps) {
+    this.topSelOps = topSelOps;
+  }
+
+  /**
+   * @return the opParseCtx
+   */
+  public HashMap<Operator<? extends Serializable>, OpParseContext> getOpParseCtx() {
+    return opParseCtx;
+  }
+
+  /**
+   * @param opParseCtx
+   *          the opParseCtx to set
+   */
+  public void setOpParseCtx(
+      HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) {
+    this.opParseCtx = opParseCtx;
+  }
+
+  /**
+   * @return the loadTableWork
+   */
+  public List<loadTableDesc> getLoadTableWork() {
+    return loadTableWork;
+  }
+
+  /**
+   * @param loadTableWork
+   *          the loadTableWork to set
+   */
+  public void setLoadTableWork(List<loadTableDesc> loadTableWork) {
+    this.loadTableWork = loadTableWork;
+  }
+
+  /**
+   * @return the loadFileWork
+   */
+  public List<loadFileDesc> getLoadFileWork() {
+    return loadFileWork;
+  }
+
+  /**
+   * @param loadFileWork
+   *          the loadFileWork to set
+   */
+  public void setLoadFileWork(List<loadFileDesc> loadFileWork) {
+    this.loadFileWork = loadFileWork;
+  }
+}

+ 46 - 9
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java

@@ -59,6 +59,12 @@ public class PartitionPruner {
   private Table tab;
 
   private exprNodeDesc prunerExpr;
+  
+  // is set to true if the expression only contains partitioning columns and not any other column reference.
+  // This is used to optimize select * from table where ... scenario, when the where condition only references
+  // partitioning columns - the partitions are identified and streamed directly to the client without requiring 
+  // a map-reduce job
+  private boolean containsPartCols;
 
   /** Creates a new instance of PartitionPruner */
   public PartitionPruner(String tableAlias, QBMetaData metaData) {
@@ -66,8 +72,13 @@ public class PartitionPruner {
     this.metaData = metaData;
     this.tab = metaData.getTableForAlias(tableAlias);
     this.prunerExpr = null;
+    containsPartCols = true;
   }
 
+  public boolean containsPartitionCols() {
+    return containsPartCols;
+  }
+  
   /**
    * We use exprNodeConstantDesc(class,null) to represent unknown values.
    * Except UDFOPAnd, UDFOPOr, and UDFOPNot, all UDFs are assumed to return unknown values 
@@ -97,12 +108,18 @@ public class PartitionPruner {
     switch (tokType) {
       case HiveParser.TOK_COLREF: {
 
-        assert(expr.getChildCount() == 2);
-        String tabAlias = expr.getChild(0).getText();
-        String colName = expr.getChild(1).getText();
-        if (tabAlias == null || colName == null) {
-          throw new SemanticException(ErrorMsg.INVALID_XPATH.getMsg(expr));
+        String tabAlias = null;
+        String colName = null;
+        if (expr.getChildCount() != 1) {
+          assert(expr.getChildCount() == 2);
+          tabAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText());
+          colName = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText());
+        }
+        else {
+          colName = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText());
+          tabAlias = SemanticAnalyzer.getTabAliasForCol(this.metaData, colName, (CommonTree)expr.getChild(0));
         }
+
         // Set value to null if it's not partition column
         if (tabAlias.equals(tableAlias) && tab.isPartitionKey(colName)) {
           desc = new exprNodeColumnDesc(String.class, colName); 
@@ -117,6 +134,7 @@ public class PartitionPruner {
               TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(
                                                                                this.metaData.getTableForAlias(tabAlias).getDeserializer().getObjectInspector());
               desc = new exprNodeConstantDesc(typeInfo.getStructFieldTypeInfo(colName), null);
+              containsPartCols = false;
             }
           } catch (SerDeException e){
             throw new RuntimeException(e);
@@ -195,8 +213,8 @@ public class PartitionPruner {
       case HiveParser.TOK_COLREF: {
 
         assert(expr.getChildCount() == 2);
-        String tabAlias = expr.getChild(0).getText();
-        String colName = expr.getChild(1).getText();
+        String tabAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText());
+        String colName = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText());
         if (tabAlias.equals(tableAlias) && tab.isPartitionKey(colName)) {
           hasPPred = true;
         }
@@ -227,11 +245,30 @@ public class PartitionPruner {
     if (!(desc instanceof exprNodeConstantDesc) || ((exprNodeConstantDesc)desc).getValue() != null ) {
       LOG.trace("adding pruning expr = " + desc);
       if (this.prunerExpr == null)
-      	this.prunerExpr = desc;
+        this.prunerExpr = desc;
       else
         this.prunerExpr = SemanticAnalyzer.getFuncExprNodeDesc("OR", this.prunerExpr, desc);
     }
   }
+
+  /** 
+   * Add an expression from the JOIN condition. Since these expressions will be used for all the where clauses, they 
+   * are always ANDed. Then we walk through the remaining filters (in the where clause) and OR them with the existing
+   * condition.
+   */
+  @SuppressWarnings("nls")
+  public void addJoinOnExpression(CommonTree expr) throws SemanticException {
+    LOG.trace("adding pruning Tree = " + expr.toStringTree());
+    exprNodeDesc desc = genExprNodeDesc(expr);
+    // Ignore null constant expressions
+    if (!(desc instanceof exprNodeConstantDesc) || ((exprNodeConstantDesc)desc).getValue() != null ) {
+      LOG.trace("adding pruning expr = " + desc);
+      if (this.prunerExpr == null)
+        this.prunerExpr = desc;
+      else
+        this.prunerExpr = SemanticAnalyzer.getFuncExprNodeDesc("AND", this.prunerExpr, desc);
+    }
+  }
   
   /** From the table metadata prune the partitions to return the partitions **/
   @SuppressWarnings("nls")
@@ -282,7 +319,7 @@ public class PartitionPruner {
           }
         }
         else
-        	ret_parts.add(part);
+          ret_parts.add(part);
       }
     }
     catch (Exception e) {

+ 1 - 10
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java

@@ -22,7 +22,6 @@ import java.util.*;
 
 import org.apache.hadoop.hive.ql.parse.QBParseInfo;
 import org.apache.hadoop.hive.ql.parse.QBMetaData;
-import org.apache.hadoop.hive.ql.metadata.Table;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -158,14 +157,6 @@ public class QB {
   }
 
   public boolean isSelectStarQuery() {
-    if (!qbp.isSelectStarQuery() || !aliasToSubq.isEmpty())
-      return false;
-
-    Iterator<Map.Entry<String, Table>> iter = qbm.getAliasToTable().entrySet().iterator();
-    Table tab = ((Map.Entry<String, Table>)iter.next()).getValue();
-    if (tab.isPartitioned())
-      return false;
-    
-    return true;
+    return qbp.isSelectStarQuery() && aliasToSubq.isEmpty();
   }
 }

+ 34 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java

@@ -22,6 +22,10 @@ import java.util.Vector;
 
 import org.antlr.runtime.tree.CommonTree;
 
+/**
+ * Internal representation of the join tree
+ *
+ */
 public class QBJoinTree 
 {
   private String        leftAlias;
@@ -33,15 +37,29 @@ public class QBJoinTree
   private joinCond[]    joinCond;
   private boolean       noOuterJoin;
   
-  // conditions
+  // join conditions
   private Vector<Vector<CommonTree>> expressions;
 
+  // filters
+  private Vector<Vector<CommonTree>> filters;
+  
+  /**
+   * constructor 
+   */
   public QBJoinTree() { nextTag = 0;}
 
+  /**
+   * returns left alias if any - this is used for merging later on
+   * @return left alias if any
+   */
   public String getLeftAlias() {
     return leftAlias;
   }
 
+  /**
+   * set left alias for the join expression
+   * @param leftAlias String
+   */
   public void setLeftAlias(String leftAlias) {
     this.leftAlias = leftAlias;
   }
@@ -109,6 +127,21 @@ public class QBJoinTree
   public void setNoOuterJoin(boolean noOuterJoin) {
     this.noOuterJoin = noOuterJoin;
   }
+
+	/**
+	 * @return the filters
+	 */
+	public Vector<Vector<CommonTree>> getFilters() {
+		return filters;
+	}
+
+	/**
+	 * @param filters the filters to set
+	 */
+	public void setFilters(Vector<Vector<CommonTree>> filters) {
+		this.filters = filters;
+	}
+
 }
 
 

+ 0 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java

@@ -115,5 +115,4 @@ public class QBMetaData {
   public Table getSrcForAlias(String alias) {
     return this.aliasToTable.get(alias.toLowerCase());
   }
-  
 }

+ 4 - 11
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java

@@ -32,7 +32,6 @@ import org.apache.commons.logging.LogFactory;
 public class QBParseInfo {
 
   private boolean isSubQ;
-  private boolean canOptTopQ;
   private String alias;
   private CommonTree joinExpr;
   private HashMap<String, CommonTree> aliasToSrc;
@@ -67,7 +66,6 @@ public class QBParseInfo {
     
     this.alias = alias;
     this.isSubQ = isSubQ;
-    this.canOptTopQ = false;
     this.outerQueryLimit = -1;
   }
 
@@ -127,6 +125,10 @@ public class QBParseInfo {
     return this.destToWhereExpr.get(clause);
   }
 
+  public HashMap<String, CommonTree> getDestToWhereExpr() {
+    return destToWhereExpr;
+  }
+
   public CommonTree getGroupByForClause(String clause) {
     return this.destToGroupby.get(clause);
   }
@@ -151,14 +153,6 @@ public class QBParseInfo {
     return this.isSubQ;
   }
 
-  public boolean getCanOptTopQ() {
-    return this.canOptTopQ;
-  }
-
-  public void setCanOptTopQ(boolean canOptTopQ) {
-    this.canOptTopQ = canOptTopQ;
-  }
-  
   public CommonTree getJoinExpr() {
     return this.joinExpr;
   }
@@ -201,7 +195,6 @@ public class QBParseInfo {
     if (isSubQ || 
        (joinExpr != null) ||
        (!nameToSample.isEmpty()) ||
-       (!destToWhereExpr.isEmpty()) ||
        (!destToGroupby.isEmpty()) ||
        (!destToClusterby.isEmpty()))
       return false;

+ 41 - 6
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java

@@ -77,14 +77,49 @@ public class RowResolver {
     return rslvMap.get(tab_alias.toLowerCase()) != null;
   }
 
-  public ColumnInfo get(String tab_alias, String col_alias) {
-    tab_alias = tab_alias.toLowerCase();
+  /**
+   * Gets the column Info to tab_alias.col_alias type of a column reference. I the tab_alias is not
+   * provided as can be the case with an non aliased column, this function looks up the column in all
+   * the table aliases in  this row resolver and returns the match. It also throws an exception if 
+   * the column is found in multiple table aliases. If no match is found a null values is returned.
+   * 
+   * This allows us to interpret both select t.c1 type of references and select c1 kind of refereneces.
+   * The later kind are what we call non aliased column references in the query.
+   * 
+   * @param tab_alias The table alias to match (this is null if the column reference is non aliased)
+   * @param col_alias The column name that is being searched for
+   * @return ColumnInfo
+   * @throws SemanticException
+   */
+  public ColumnInfo get(String tab_alias, String col_alias) 
+    throws SemanticException {
     col_alias = col_alias.toLowerCase();
-    HashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias);
-    if (f_map == null) {
-      return null;
+    ColumnInfo ret = null;
+
+    if (tab_alias != null) {
+      tab_alias = tab_alias.toLowerCase();
+      HashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias);
+      if (f_map == null) {
+        return null;
+      }
+      ret = f_map.get(col_alias);
     }
-    return f_map.get(col_alias);
+    else {
+      boolean found = false;
+      for(LinkedHashMap<String, ColumnInfo> cmap: rslvMap.values()) {
+        for(Map.Entry<String, ColumnInfo> cmapEnt: cmap.entrySet()) {
+          if (col_alias.equalsIgnoreCase((String)cmapEnt.getKey())) {
+            if (found) {
+              throw new SemanticException("Column " + col_alias + " Found in more than One Tables/Subqueries");
+            }
+            found = true;
+            ret = (ColumnInfo)cmapEnt.getValue();
+          }
+        }
+      }
+    }
+
+    return ret; 
   }
 
   public Vector<ColumnInfo> getColumnInfos() {

+ 61 - 8
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePruner.java

@@ -22,43 +22,96 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.fs.Path;
-import java.util.*;
-
 
+/**
+ * 
+ * This class stores the mapping from table alias to the parse tree information of the table
+ * sample clause(stored in the TableSample class).
+ *
+ */
 public class SamplePruner {
+
+  /**
+   * Table alias for the table e.g. in case of FROM t TABLESAMPLE(1 OUT OF 2 ON rand()) a
+   * "a" is the table alias
+   */
   private String tabAlias;
+  
+  /**
+   * The parse tree corresponding to the TABLESAMPLE clause. e.g. in case of 
+   * FROM t TABLESAMPLE(1 OUT OF 2 ON rand()) a the parse tree of 
+   * "TABLESAMPLE(1 OUT OF 2 ON rand())" is parsed out and stored in tableSample
+   */  
   private TableSample tableSample;
-  // The log
-    @SuppressWarnings("nls")
-    private static final Log LOG = LogFactory.getLog("hive.ql.parse.SamplePruner");
+ 
+  /**
+   * The log handle for this class
+   */
+  @SuppressWarnings("nls")
+  private static final Log LOG = LogFactory.getLog("hive.ql.parse.SamplePruner");
 
+  /**
+   * Constructs the SamplePruner given the table alias and the table sample
+   * 	
+   * @param alias The alias of the table specified in the query
+   * @param tableSample The parse infromation of the TABLESAMPLE clause
+   */
   public SamplePruner(String alias, TableSample tableSample) {
     this.tabAlias = alias;
     this.tableSample = tableSample;
   }
+  
+  /**
+   * Gets the table alias
+   * 
+   * @return String
+   */
   public String getTabAlias() {
     return this.tabAlias;
   }
+  
+  /**
+   * Sets the table alias
+   * 
+   * @param tabAlias The table alias as specified in the query
+   */
   public void setTabAlias(String tabAlias) {
     this.tabAlias = tabAlias;
   }
+  
+  /**
+   * Gets the parse information of the associated table sample clause
+   * 
+   * @return TableSample
+   */
   public TableSample getTableSample() {
     return this.tableSample;
   }
+  
+  /**
+   * Sets the parse information of the associated table sample clause
+   * 
+   * @param tableSample Information related to the table sample clause
+   */
   public void setTableSample(TableSample tableSample) {
     this.tableSample = tableSample;
   }
 
+  /**
+   * Prunes to get all the files in the partition that satisfy the TABLESAMPLE clause
+   * 
+   * @param part The partition to prune
+   * @return Path[]
+   * @throws SemanticException
+   */
   @SuppressWarnings("nls")
   public Path[] prune(Partition part) throws SemanticException {
     int num = this.tableSample.getNumerator();
     int den = this.tableSample.getDenominator();
     int bucketCount = part.getBucketCount();
-    List<String> tabBucketCols = part.getBucketCols();
-    ArrayList<String> sampleCols = this.tableSample.getCols();
     String fullScanMsg = "";
     // check if input pruning is possible
-    if (sampleCols == null || sampleCols.size() == 0 || tabBucketCols.equals(sampleCols)) {
+    if (this.tableSample.getInputPruning()) {
       LOG.trace("numerator = " + num);
       LOG.trace("denominator = " + den);
       LOG.trace("bucket count = " + bucketCount);

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 607 - 145
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java


+ 3 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java

@@ -39,6 +39,9 @@ public class SemanticAnalyzerFactory {
       case HiveParser.TOK_ALTERTABLE_REPLACECOLS:
       case HiveParser.TOK_ALTERTABLE_RENAME:
       case HiveParser.TOK_ALTERTABLE_DROPPARTS:
+      case HiveParser.TOK_ALTERTABLE_PROPERTIES:
+      case HiveParser.TOK_ALTERTABLE_SERIALIZER:
+      case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES:
       case HiveParser.TOK_SHOWTABLES:
       case HiveParser.TOK_SHOWPARTITIONS:
         return new DDLSemanticAnalyzer(conf);

+ 100 - 7
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/TableSample.java

@@ -19,34 +19,127 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import java.util.ArrayList;
+import org.antlr.runtime.tree.CommonTree;
 
+/**
+ * 
+ * This class stores all the information specified in the TABLESAMPLE clause. e.g. 
+ * for the clause "FROM t TABLESAMPLE(1 OUT OF 2 ON c1) it will store the numerator
+ * 1, the denominator 2 and the list of expressions(in this case c1) in the appropriate
+ * fields. The afore-mentioned sampling clause causes the 1st bucket to be picked out of
+ * the 2 buckets created by hashing on c1.
+ *
+ */
 public class TableSample {
+	
+  /**
+   * The numerator of the TABLESAMPLE clause
+   */
   private int numerator;
+  
+  /**
+   * The denominator of the TABLESAMPLE clause
+   */
   private int denominator;
-  private ArrayList<String> cols;
   
-  public TableSample(String num, String den, ArrayList<String> cols) {
+  /**
+   * The list of expressions following ON part of the TABLESAMPLE clause. This list is
+   * empty in case there are no expressions such as in the clause
+   * "FROM t TABLESAMPLE(1 OUT OF 2)". For this expression the sampling is done
+   * on the tables clustering column(as specified when the table was created). In case
+   * the table does not have any clustering column, the usage of a table sample clause
+   * without an ON part is disallowed by the compiler
+   */
+  private ArrayList<CommonTree> exprs;
+  
+  /**
+   * Flag to indicate that input files can be pruned
+   */
+  private boolean inputPruning;
+  
+  /**
+   * Constructs the TableSample given the numerator, denominator and the list of
+   * ON clause expressions
+   * 
+   * @param num The numerator
+   * @param den The denominator
+   * @param exprs The list of expressions in the ON part of the TABLESAMPLE clause
+   */
+  public TableSample(String num, String den, ArrayList<CommonTree> exprs) {
     this.numerator = Integer.valueOf(num).intValue();
     this.denominator = Integer.valueOf(den).intValue();
-    this.cols = cols;
+    this.exprs = exprs;
   }
+  
+  /**
+   * Gets the numerator
+   * 
+   * @return int
+   */
   public int getNumerator() {
     return this.numerator;
   }
+  
+  /**
+   * Sets the numerator
+   * 
+   * @param num The numerator
+   */
   public void setNumerator(int num) {
     this.numerator = num;
   }
+  
+  /**
+   * Gets the denominator
+   * 
+   * @return int
+   */
   public int getDenominator() {
     return this.denominator;
   }
+  
+  /**
+   * Sets the denominator
+   * 
+   * @param den The denominator
+   */
   public void setDenominator(int den) {
     this.denominator = den;
   }
-  public ArrayList<String> getCols() {
-    return this.cols;
+  
+  /**
+   * Gets the ON part's expression list
+   * 
+   * @return ArrayList<CommonTree>
+   */
+  public ArrayList<CommonTree> getExprs() {
+    return this.exprs;
   }
-  public void setCols(ArrayList<String> cols) {
-    this.cols = cols;
+  
+  /**
+   * Sets the expression list
+   * 
+   * @param exprs The expression list
+   */
+  public void setExprs(ArrayList<CommonTree> exprs) {
+    this.exprs = exprs;
   }
 
+  /**
+   * Gets the flag that indicates whether input pruning is possible
+   * 
+   * @return boolean
+   */
+  public boolean getInputPruning() {
+	  return this.inputPruning;
+  }
+ 
+  /**
+   * Sets the flag that indicates whether input pruning is possible or not
+   * 
+   * @param inputPruning true if input pruning is possible
+   */
+  public void setInputPruning(boolean inputPruning) {
+	  this.inputPruning = inputPruning;
+  }
 }

+ 161 - 18
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java

@@ -20,13 +20,27 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.util.*;
 import java.io.*;
+
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
 
+import com.facebook.thrift.protocol.TBinaryProtocol;
+
 public class PlanUtils {
 
   public static enum ExpressionTypes {FIELD, JEXL};
@@ -37,21 +51,45 @@ public class PlanUtils {
                           new LinkedHashMap<String, ArrayList<String>> (),
                           new LinkedHashMap<String, partitionDesc> (),
                           new HashMap<String, Operator<? extends Serializable>> (),
-                          new schemaDesc(),
-                          new HashMap<String, schemaDesc> (),
+                          new tableDesc(),
+                          new ArrayList<tableDesc> (),
                           null,
                           Integer.valueOf (1));
   }
   
+  /** 
+   * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode
+   * and column names (comma separated string).
+   */
   public static tableDesc getDefaultTableDesc(String separatorCode, String columns) {
+    return getDefaultTableDesc(separatorCode, columns, false);
+  }
+
+  /** 
+   * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode
+   * and column names (comma separated string), and whether the last column should take
+   * the rest of the line.
+   */
+  public static tableDesc getDefaultTableDesc(String separatorCode, String columns,
+      boolean lastColumnTakesRestOfTheLine) {
+    Properties properties = Utilities.makeProperties(
+        Constants.SERIALIZATION_FORMAT, separatorCode,
+        "columns", columns);
+    if (lastColumnTakesRestOfTheLine) {
+      properties.setProperty(
+          Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST,
+          "true");
+    }
     return new tableDesc(
         MetadataTypedColumnsetSerDe.class,
         TextInputFormat.class,
         IgnoreKeyTextOutputFormat.class,
-        Utilities.makeProperties(
-            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, separatorCode,
-            "columns", columns));    
+        properties);    
   }
+
+  /** 
+   * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode
+   */
   public static tableDesc getDefaultTableDesc(String separatorCode) {
     return new tableDesc(
         MetadataTypedColumnsetSerDe.class,
@@ -61,26 +99,131 @@ public class PlanUtils {
             org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, separatorCode));    
   }
 
+  /** 
+   * Generate the table descriptor of DynamicSerDe and TBinarySortableProtocol.
+   */
+  public static tableDesc getBinarySortableTableDesc(List<FieldSchema> fieldSchemas) {
+    String structName = "binary_sortable_table";
+    return new tableDesc(
+        DynamicSerDe.class,
+        SequenceFileInputFormat.class,
+        SequenceFileOutputFormat.class,
+        Utilities.makeProperties(
+            "name", structName,        
+            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, TBinarySortableProtocol.class.getName(),
+            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_DDL, 
+              MetaStoreUtils.getDDLFromFieldSchema(structName, fieldSchemas)
+        ));    
+  }
+
+  /** 
+   * Generate the table descriptor of DynamicSerDe and TBinaryProtocol.
+   */
+  public static tableDesc getBinaryTableDesc(List<FieldSchema> fieldSchemas) {
+    String structName = "binary_table";
+    return new tableDesc(
+        DynamicSerDe.class,
+        SequenceFileInputFormat.class,
+        SequenceFileOutputFormat.class,
+        Utilities.makeProperties(
+            "name", structName,
+            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, TBinaryProtocol.class.getName(),
+            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_DDL, 
+              MetaStoreUtils.getDDLFromFieldSchema(structName, fieldSchemas)
+        ));    
+  }
   
-  // We will make reduce key and reduce value TableDesc with configurable SerDes   
+  
+  /** 
+   * Convert the ColumnList to FieldSchema list.
+   */
+  public static List<FieldSchema> getFieldSchemasFromColumnList(ArrayList<exprNodeDesc> cols, 
+      String fieldPrefix) {
+    List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
+    for (int i=0; i<cols.size(); i++) {
+      schemas.add(TypeInfoUtils.getFieldSchemaFromTypeInfo(fieldPrefix + i, cols.get(i).getTypeInfo()));
+    }
+    return schemas;
+  }
+  
+  /** 
+   * Convert the RowSchema to FieldSchema list.
+   */
+  public static List<FieldSchema> getFieldSchemasFromRowSchema(RowSchema row, String fieldPrefix) {
+    Vector<ColumnInfo> c = row.getSignature();
+    return getFieldSchemasFromColumnInfo(c, fieldPrefix);
+  }
+  
+  /** 
+   * Convert the ColumnInfo to FieldSchema.
+   */
+  public static List<FieldSchema> getFieldSchemasFromColumnInfo(Vector<ColumnInfo> cols, String fieldPrefix) {
+    List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
+    for (int i=0; i<cols.size(); i++) {
+      String name = cols.get(i).getInternalName();
+      if (name.equals(Integer.valueOf(i).toString())) {
+        name = fieldPrefix + name; 
+      }
+      schemas.add(TypeInfoUtils.getFieldSchemaFromTypeInfo(name, cols.get(i).getType()));
+    }
+    return schemas;
+  }
+  
+  /**
+   * Create the reduce sink descriptor.
+   * @param keyCols   The columns to be stored in the key
+   * @param valueCols The columns to be stored in the value
+   * @param tag       The tag for this reducesink
+   * @param partitionCols The columns for partitioning.
+   * @param numReducers  The number of reducers.
+   * @param inferNumReducers  whether we should try to infer the number of reducers.
+   * @return The reduceSinkDesc object.
+   */
   public static reduceSinkDesc getReduceSinkDesc(ArrayList<exprNodeDesc> keyCols, 
                                                  ArrayList<exprNodeDesc> valueCols, 
-                                                 int tag, int numPartitionFields, 
+                                                 int tag, 
+                                                 ArrayList<exprNodeDesc> partitionCols, 
                                                  int numReducers, boolean inferNumReducers) {
-     
-    return new reduceSinkDesc(keyCols, valueCols, tag, numPartitionFields, numReducers, inferNumReducers,
-      getDefaultTableDesc("" + Utilities.ctrlaCode, ObjectInspectorUtils.getIntegerCSV(keyCols.size())),
-      getDefaultTableDesc("" + Utilities.ctrlaCode, ObjectInspectorUtils.getIntegerCSV(valueCols.size())));
+    
+    return new reduceSinkDesc(keyCols, valueCols, tag, partitionCols, numReducers, inferNumReducers,
+        getBinarySortableTableDesc(getFieldSchemasFromColumnList(keyCols, "reducesinkkey")),
+        getBinaryTableDesc(getFieldSchemasFromColumnList(valueCols, "reducesinkvalue")));
   }
 
-  // We should read the TableDesc from gWork when it is available.   
-  public static tableDesc getReduceKeyDesc(mapredWork gWork) {
-     return getDefaultTableDesc("" + Utilities.ctrlaCode);
-  }
+  /**
+   * Create the reduce sink descriptor.
+   * @param keyCols   The columns to be stored in the key
+   * @param valueCols The columns to be stored in the value
+   * @param tag       The tag for this reducesink
+   * @param numPartitionFields  The first numPartitionFields of keyCols will be partition columns.
+   *                  If numPartitionFields=-1, then partition randomly.
+   * @param numReducers  The number of reducers.
+   * @param inferNumReducers  whether we should try to infer the number of reducers.
+   * @return The reduceSinkDesc object.
+   */
+  public static reduceSinkDesc getReduceSinkDesc(ArrayList<exprNodeDesc> keyCols, 
+                                                 ArrayList<exprNodeDesc> valueCols, 
+                                                 int tag, 
+                                                 int numPartitionFields, 
+                                                 int numReducers, boolean inferNumReducers) {
+    ArrayList<exprNodeDesc> partitionCols = null;
 
-  // We should read the TableDesc from gWork when it is available.   
-  public static tableDesc getReduceValueDesc(mapredWork gWork, int tag) {
-     return getDefaultTableDesc("" + Utilities.ctrlaCode);
+    if (numPartitionFields >= keyCols.size()) {
+      partitionCols = keyCols;
+    } else if (numPartitionFields >= 0) {
+      partitionCols = new ArrayList<exprNodeDesc>(numPartitionFields);
+      for (int i=0; i<numPartitionFields; i++) {
+        partitionCols.add(keyCols.get(i));
+      }
+    } else {
+      // numPartitionFields = -1 means random partitioning
+      partitionCols = new ArrayList<exprNodeDesc>(1);
+      partitionCols.add(SemanticAnalyzer.getFuncExprNodeDesc("rand"));
+    }
+    
+    return getReduceSinkDesc(keyCols, valueCols, tag, partitionCols, numReducers, inferNumReducers);
   }
   
+  
 }
+  

+ 41 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/alterTableDesc.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -28,12 +29,14 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
 public class alterTableDesc extends ddlDesc implements Serializable 
 {
   private static final long serialVersionUID = 1L;
-  public static enum alterTableTypes {RENAME, ADDCOLS, REPLACECOLS};
+  public static enum alterTableTypes {RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS};
     
   alterTableTypes      op;
   String               oldName;
   String               newName;
   List<FieldSchema>    newCols;
+  String               serdeName;
+  Map<String, String>  props;
   
   /**
    * @param oldName old name of the table
@@ -54,6 +57,13 @@ public class alterTableDesc extends ddlDesc implements Serializable
     this.oldName = name;
     this.newCols = newCols;
   }
+  
+  /**
+   * @param alterType type of alter op
+   */
+  public alterTableDesc(alterTableTypes alterType) {
+    this.op = alterType;
+  }
 
   /**
    * @return the old name of the table
@@ -130,4 +140,34 @@ public class alterTableDesc extends ddlDesc implements Serializable
     this.newCols = newCols;
   }
 
+  /**
+   * @return the serdeName
+   */
+  @explain(displayName="deserializer library")
+  public String getSerdeName() {
+    return serdeName;
+  }
+
+  /**
+   * @param serdeName the serdeName to set
+   */
+  public void setSerdeName(String serdeName) {
+    this.serdeName = serdeName;
+  }
+
+  /**
+   * @return the props
+   */
+  @explain(displayName="properties")
+  public Map<String, String> getProps() {
+    return props;
+  }
+
+  /**
+   * @param props the props to set
+   */
+  public void setProps(Map<String, String> props) {
+    this.props = props;
+  }
+
 }

+ 11 - 2
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeColumnDesc.java

@@ -19,20 +19,22 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.ql.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 
 public class exprNodeColumnDesc extends exprNodeDesc implements Serializable {
   private static final long serialVersionUID = 1L;
   private String column;
-  private boolean isVirtual;
   
   public exprNodeColumnDesc() {}
   public exprNodeColumnDesc(TypeInfo typeInfo, String column) {
     super(typeInfo);
     this.column = column;
-    this.isVirtual = isVirtual;
   }
   public exprNodeColumnDesc(Class<?> c, String column) {
     super(TypeInfoFactory.getPrimitiveTypeInfo(c));
@@ -54,4 +56,11 @@ public class exprNodeColumnDesc extends exprNodeDesc implements Serializable {
   public String getExprString() {
     return getColumn();
   }
+
+  public List<String> getCols() {
+  	List<String> lst = new ArrayList<String>();
+  	lst.add(column);
+  	return lst;
+  }
+
 }

+ 7 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java

@@ -19,9 +19,11 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
 
 public class exprNodeDesc implements Serializable {  
   private static final long serialVersionUID = 1L;
@@ -51,4 +53,9 @@ public class exprNodeDesc implements Serializable {
   public String getTypeString() {
     return typeInfo.getTypeName();
   }
+
+  public List<String> getCols() {
+    return null;
+  }
+
 }

+ 13 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java

@@ -19,8 +19,13 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+
 
 public class exprNodeFieldDesc extends exprNodeDesc implements Serializable {
   private static final long serialVersionUID = 1L;
@@ -67,5 +72,12 @@ public class exprNodeFieldDesc extends exprNodeDesc implements Serializable {
   @Override
   public String getExprString() {
     return this.desc.getExprString() + "." + this.fieldName;
-  }
+  }
+
+  public List<String> getCols() {
+    List<String> colList = new ArrayList<String>();
+    if (desc != null) 
+    	colList = Utilities.mergeUniqElems(colList, desc.getCols());    
+    return colList;
+  }
 }

+ 17 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java

@@ -21,12 +21,15 @@ package org.apache.hadoop.hive.ql.plan;
 import java.io.Serializable;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
 
 /**
  * The reason that we have to store UDFClass as well as UDFMethod is because
@@ -133,4 +136,18 @@ public class exprNodeFuncDesc extends exprNodeDesc implements Serializable {
     
     return sb.toString();
   }
+
+  public List<String> getCols() {
+    List<String> colList = new ArrayList<String>();
+    if (children != null) {
+      int pos = 0;
+      while (pos < children.size()) {
+        List<String> colCh = children.get(pos).getCols();
+        colList = Utilities.mergeUniqElems(colList, colCh);
+        pos++;
+      }
+    }
+
+    return colList;
+  }
 }

+ 17 - 2
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeIndexDesc.java

@@ -19,8 +19,13 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.parse.RowResolver;
+
 
 
 public class exprNodeIndexDesc extends exprNodeDesc implements Serializable {
@@ -61,5 +66,15 @@ public class exprNodeIndexDesc extends exprNodeDesc implements Serializable {
   @Override
   public String getExprString() {
     return this.desc.getExprString() + "[" + this.index.getExprString() + "]";
-  }
-}
+  }
+  
+  public List<String> getCols() {
+    List<String> colList = new ArrayList<String>();
+    if (desc != null) 
+    	colList = Utilities.mergeUniqElems(colList, desc.getCols());
+    if (index != null)
+    	colList = Utilities.mergeUniqElems(colList, index.getCols());
+    
+    return colList;
+  }
+}

+ 41 - 49
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/fetchWork.java

@@ -19,99 +19,91 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
-import java.util.Properties;
+import java.util.List;
 
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.hive.ql.plan.tableDesc;
 
 @explain(displayName="Fetch Operator")
 public class fetchWork implements Serializable {
   private static final long serialVersionUID = 1L;
 
-  //  private loadFileDesc loadFileWork;
-  //  private tableDesc    tblDesc;
-  private Path srcDir;
-  private Properties schema;
-  private Class<? extends Deserializer> deserializerClass;
-  private Class<? extends InputFormat> inputFormatClass;
+  private Path tblDir;
+  private tableDesc tblDesc;
+
+  private List<Path> partDir;
+  private List<partitionDesc> partDesc;
+
   private int limit;
 
   public fetchWork() { }
 
-	/**
-	 * @param deserializer
-	 * @param deserializerClass
-	 * @param inputFormatClass
-	 * @param schema
-	 * @param srcDir
-	 */
-	public fetchWork(Path srcDir,
-			Class<? extends Deserializer> deserializerClass,
-			Class<? extends InputFormat> inputFormatClass, Properties schema,
-			int limit) {
-		this.srcDir = srcDir;
-		this.deserializerClass = deserializerClass;
-		this.inputFormatClass = inputFormatClass;
-		this.schema = schema;
+	public fetchWork(Path tblDir, tableDesc tblDesc, int limit) {
+		this.tblDir = tblDir;
+		this.tblDesc = tblDesc;
 		this.limit = limit;
 	}
 
+	public fetchWork(List<Path> partDir, List<partitionDesc> partDesc, int limit) {
+		this.partDir = partDir;
+		this.partDesc = partDesc;
+		this.limit = limit;
+	}
+	
 	/**
-	 * @return the srcDir
+	 * @return the tblDir
 	 */
-  @explain(displayName="source")
-	public Path getSrcDir() {
-		return srcDir;
+	public Path getTblDir() {
+		return tblDir;
 	}
 
 	/**
-	 * @param srcDir the srcDir to set
+	 * @param tblDir the tblDir to set
 	 */
-	public void setSrcDir(Path srcDir) {
-		this.srcDir = srcDir;
+	public void setTblDir(Path tblDir) {
+		this.tblDir = tblDir;
 	}
 
 	/**
-	 * @return the schema
+	 * @return the tblDesc
 	 */
-	public Properties getSchema() {
-		return schema;
+	public tableDesc getTblDesc() {
+		return tblDesc;
 	}
 
 	/**
-	 * @param schema the schema to set
+	 * @param tblDesc the tblDesc to set
 	 */
-	public void setSchema(Properties schema) {
-		this.schema = schema;
+	public void setTblDesc(tableDesc tblDesc) {
+		this.tblDesc = tblDesc;
 	}
 
 	/**
-	 * @return the deserializerClass
+	 * @return the partDir
 	 */
-	public Class<? extends Deserializer> getDeserializerClass() {
-		return deserializerClass;
+	public List<Path> getPartDir() {
+		return partDir;
 	}
 
 	/**
-	 * @param deserializerClass the deserializerClass to set
+	 * @param partDir the partDir to set
 	 */
-	public void setDeserializerClass(Class<? extends Deserializer> deserializerClass) {
-		this.deserializerClass = deserializerClass;
+	public void setPartDir(List<Path> partDir) {
+		this.partDir = partDir;
 	}
 
 	/**
-	 * @return the inputFormatClass
+	 * @return the partDesc
 	 */
-	public Class<? extends InputFormat> getInputFormatClass() {
-		return inputFormatClass;
+	public List<partitionDesc> getPartDesc() {
+		return partDesc;
 	}
 
 	/**
-	 * @param inputFormatClass the inputFormatClass to set
+	 * @param partDesc the partDesc to set
 	 */
-	public void setInputFormatClass(Class<? extends InputFormat> inputFormatClass) {
-		this.inputFormatClass = inputFormatClass;
+	public void setPartDesc(List<partitionDesc> partDesc) {
+		this.partDesc = partDesc;
 	}
 
 	/**

+ 3 - 2
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/groupByDesc.java

@@ -23,11 +23,12 @@ public class groupByDesc implements java.io.Serializable {
   /** Group-by Mode:
    *  COMPLETE: complete 1-phase aggregation: aggregate, evaluate
    *  PARTIAL1: partial aggregation - first phase:  aggregate, evaluatePartial
-   *  PARTIAL2: partial aggregation - second phase: aggregatePartial, evaluate
+   *  PARTIAL2: partial aggregation - second phase: aggregatePartial, evaluatePartial
+   *  FINAL: partial aggregation - final phase: aggregatePartial, evaluate
    *  HASH: the same as PARTIAL1 but use hash-table-based aggregation  
    */
   private static final long serialVersionUID = 1L;
-  public static enum Mode { COMPLETE, PARTIAL1, PARTIAL2, HASH };
+  public static enum Mode { COMPLETE, PARTIAL1, PARTIAL2, FINAL, HASH };
   private Mode mode;
   private java.util.ArrayList<exprNodeDesc> keys;
   private java.util.ArrayList<org.apache.hadoop.hive.ql.plan.aggregationDesc> aggregators;

+ 14 - 14
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/mapredWork.java

@@ -39,10 +39,10 @@ public class mapredWork implements Serializable {
 
   // map<->reduce interface
   // schema of the map-reduce 'key' object - this is homogeneous
-  private schemaDesc keySchema;
+  private tableDesc keyDesc;
 
   // schema of the map-reduce 'val' object - this is heterogeneous
-  private HashMap<String,schemaDesc> aliasToSchema;
+  private List<tableDesc> tagToValueDesc;
 
   private Operator<?> reducer;
   
@@ -57,16 +57,16 @@ public class mapredWork implements Serializable {
     final LinkedHashMap<String,ArrayList<String>> pathToAliases,
     final LinkedHashMap<String,partitionDesc> pathToPartitionInfo,
     final HashMap<String,Operator<? extends Serializable>> aliasToWork,
-    final schemaDesc keySchema,
-    HashMap<String,schemaDesc> aliasToSchema,
+    final tableDesc keyDesc,
+    List<tableDesc> tagToValueDesc,
     final Operator<?> reducer,
     final Integer numReduceTasks) {
     this.command = command;
     this.pathToAliases = pathToAliases;
     this.pathToPartitionInfo = pathToPartitionInfo;
     this.aliasToWork = aliasToWork;
-    this.keySchema = keySchema;
-    this.aliasToSchema = aliasToSchema;
+    this.keyDesc = keyDesc;
+    this.tagToValueDesc = tagToValueDesc;
     this.reducer = reducer;
     this.numReduceTasks = numReduceTasks;
   }
@@ -100,17 +100,17 @@ public class mapredWork implements Serializable {
   public void setAliasToWork(final HashMap<String,Operator<? extends Serializable>> aliasToWork) {
     this.aliasToWork=aliasToWork;
   }
-  public schemaDesc getKeySchema() {
-    return this.keySchema;
+  public tableDesc getKeyDesc() {
+    return this.keyDesc;
   }
-  public void setKeySchema(final schemaDesc keySchema) {
-    this.keySchema = keySchema;
+  public void setKeyDesc(final tableDesc keyDesc) {
+    this.keyDesc = keyDesc;
   }
-  public HashMap<String,schemaDesc> getAliasToSchema() {
-    return this.aliasToSchema;
+  public List<tableDesc> getTagToValueDesc() {
+    return tagToValueDesc;
   }
-  public void setAliasToSchema(final HashMap<String,schemaDesc> aliasToSchema) {
-    this.aliasToSchema = aliasToSchema;
+  public void setTagToValueDesc(final List<tableDesc> tagToValueDesc) {
+    this.tagToValueDesc = tagToValueDesc;
   }
 
   @explain(displayName="Reduce Operator Tree")

+ 28 - 14
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/reduceSinkDesc.java

@@ -23,20 +23,34 @@ import java.io.Serializable;
 @explain(displayName="Reduce Output Operator")
 public class reduceSinkDesc implements Serializable {
   private static final long serialVersionUID = 1L;
-  // these are the expressions that go into the reduce key
+  /**
+   * Key columns are passed to reducer in the "key". 
+   */
   private java.util.ArrayList<exprNodeDesc> keyCols;
+  /**
+   * Value columns are passed to reducer in the "value". 
+   */
   private java.util.ArrayList<exprNodeDesc> valueCols;
-  // Describe how to serialize the key
+  /** 
+   * Describe how to serialize the key.
+   */
   private tableDesc keySerializeInfo;
-  // Describe how to serialize the value
+  /**
+   * Describe how to serialize the value.
+   */
   private tableDesc valueSerializeInfo;
   
+  /**
+   * The tag for this reducesink descriptor.
+   */
   private int tag;
   
-  // The partition key will be the first #numPartitionFields of keyCols
-  // If the value is 0, then all data will go to a single reducer
-  // If the value is -1, then data will go to a random reducer 
-  private int numPartitionFields;
+  /**
+   * The partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language).
+   * Partition columns decide the reducer that the current row goes to.
+   * Partition columns are not passed to reducer.
+   */
+  private java.util.ArrayList<exprNodeDesc> partitionCols;
   
   private boolean inferNumReducers;
   private int numReducers;
@@ -47,7 +61,7 @@ public class reduceSinkDesc implements Serializable {
     (java.util.ArrayList<exprNodeDesc> keyCols,
      java.util.ArrayList<exprNodeDesc> valueCols,
      int tag,
-     int numPartitionFields,
+     java.util.ArrayList<exprNodeDesc> partitionCols,
      int numReducers,
      boolean inferNumReducers,
      final tableDesc keySerializeInfo,
@@ -57,7 +71,7 @@ public class reduceSinkDesc implements Serializable {
     this.tag = tag;
     this.numReducers = numReducers;
     this.inferNumReducers = inferNumReducers;
-    this.numPartitionFields = numPartitionFields;
+    this.partitionCols = partitionCols;
     this.keySerializeInfo = keySerializeInfo;
     this.valueSerializeInfo = valueSerializeInfo;
   }
@@ -80,12 +94,12 @@ public class reduceSinkDesc implements Serializable {
     this.valueCols=valueCols;
   }
   
-  @explain(displayName="# partition fields")
-  public int getNumPartitionFields() {
-    return this.numPartitionFields;
+  @explain(displayName="Map-reduce partition columns")
+  public java.util.ArrayList<exprNodeDesc> getPartitionCols() {
+    return this.partitionCols;
   }
-  public void setNumPartitionFields(int numPartitionFields) {
-    this.numPartitionFields = numPartitionFields;
+  public void setPartitionCols(final java.util.ArrayList<exprNodeDesc> partitionCols) {
+    this.partitionCols = partitionCols;
   }
   
   @explain(displayName="tag")

+ 21 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/plan/selectDesc.java

@@ -24,11 +24,18 @@ import java.io.Serializable;
 public class selectDesc implements Serializable {
   private static final long serialVersionUID = 1L;
   private java.util.ArrayList<org.apache.hadoop.hive.ql.plan.exprNodeDesc> colList;
+  private boolean selectStar;
   public selectDesc() { }
+  public selectDesc(final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.exprNodeDesc> colList) {
+    this(colList, false);
+  }
+  
   public selectDesc(
-    final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.exprNodeDesc> colList) {
+    final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.exprNodeDesc> colList, final boolean selectStar) {
     this.colList = colList;
+    this.selectStar = selectStar;
   }
+  
   @explain(displayName="expressions")
   public java.util.ArrayList<org.apache.hadoop.hive.ql.plan.exprNodeDesc> getColList() {
     return this.colList;
@@ -36,4 +43,17 @@ public class selectDesc implements Serializable {
   public void setColList(final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.exprNodeDesc> colList) {
     this.colList=colList;
   }
+  
+  /**
+   * @return the selectStar
+   */
+  public boolean isSelectStar() {
+    return selectStar;
+  }
+  /**
+   * @param selectStar the selectStar to set
+   */
+  public void setSelectStar(boolean selectStar) {
+    this.selectStar = selectStar;
+  }
 }

+ 115 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java

@@ -26,14 +26,19 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.log4j.*;
 import java.net.URL;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 
 import org.apache.commons.lang.StringUtils;
 
 public class SessionState {
-  
+
+  public static Log LOG = LogFactory.getLog("SessionState");
+  public static LogHelper console = new LogHelper(LOG);
+
   /**
    * current configuration
    */ 
@@ -146,11 +151,13 @@ public class SessionState {
   public static SessionState start(HiveConf conf) {
     ss = new SessionState (conf);
     ss.getConf().setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId());
+    console = new LogHelper(LOG);
     return (ss);
   }
 
   public static SessionState start(SessionState startSs) {
     ss = startSs;
+    console = new LogHelper(LOG);
     ss.getConf().setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId());
     return ss;
   }
@@ -162,6 +169,10 @@ public class SessionState {
     return ss;
   }
 
+  public static LogHelper getConsole() {
+    return console;
+  }
+
   private static String makeSessionId() {
     GregorianCalendar gc = new GregorianCalendar();
     String userid = System.getProperty("user.name");
@@ -242,4 +253,107 @@ public class SessionState {
       LOG.error(error + StringUtils.defaultString(detail));
     }
   }
+
+  public static String validateFile(Set<String> curFiles, String newFile) {
+    SessionState ss = SessionState.get();
+    LogHelper console = SessionState.getConsole();
+    Configuration conf = (ss == null) ? new Configuration() : ss.getConf();
+
+    try {
+      if(Utilities.realFile(newFile, conf) != null)
+        return newFile;
+      else {
+        console.printError(newFile + " does not exist");
+        return null;
+      }
+    } catch (IOException e) {
+      console.printError("Unable to validate " + newFile + "\nException: " + e.getMessage(),
+                         "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+      return null;
+    }
+  }
+
+  public static interface ResourceHook {
+    public String preHook(Set<String> cur, String s);
+  }
+
+  public static enum ResourceType {
+    FILE(new ResourceHook () {
+        public String preHook(Set<String> cur, String s) { return validateFile(cur, s); }
+      });
+
+    public ResourceHook hook;
+
+    ResourceType(ResourceHook hook) {
+      this.hook = hook;
+    }
+  };
+
+  public static ResourceType find_resource_type(String s) {
+    
+    s = s.trim().toUpperCase();
+    
+    try {
+      return ResourceType.valueOf(s);
+    } catch (IllegalArgumentException e) {
+    }
+    
+    // try singular
+    if(s.endsWith("S")) {
+      s = s.substring(0, s.length()-1);
+    } else {
+      return null;
+    }
+
+    try {
+      return ResourceType.valueOf(s);
+    } catch (IllegalArgumentException e) {
+    }
+    return null;
+  }
+
+  private HashMap<ResourceType, HashSet<String>> resource_map = new HashMap<ResourceType, HashSet<String>> ();
+
+  public void add_resource(ResourceType t, String value) {
+    if(resource_map.get(t) == null) {
+      resource_map.put(t, new HashSet<String> ());
+    }
+
+    String fnlVal = value;
+    if(t.hook != null) {
+      fnlVal = t.hook.preHook(resource_map.get(t), value);
+      if(fnlVal == null)
+        return;
+    }
+    resource_map.get(t).add(fnlVal);
+  }
+
+  public boolean delete_resource(ResourceType t, String value) {
+    if(resource_map.get(t) == null) {
+      return false;
+    }
+    return (resource_map.get(t).remove(value));
+  }
+
+  public Set<String> list_resource(ResourceType t, List<String> filter) {
+    if(resource_map.get(t) == null) {
+      return null;
+    }
+    Set<String> orig = resource_map.get(t);
+    if(filter == null) {
+      return orig;
+    } else {
+      Set<String> fnl = new HashSet<String> ();
+      for(String one: orig) {
+        if(filter.contains(one)) {
+          fnl.add(one);
+        }
+      }
+      return fnl;
+    }
+  }
+
+  public void delete_resource(ResourceType t) {
+    resource_map.remove (t);
+  }
 }

+ 1 - 1
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/typeinfo/PrimitiveTypeInfo.java

@@ -43,7 +43,7 @@ public class PrimitiveTypeInfo extends TypeInfo implements Serializable {
   public PrimitiveTypeInfo() {}
 
   public String getTypeName() {
-    return ObjectInspectorUtils.getClassShortName(primitiveClass.getName());
+    return ObjectInspectorUtils.getClassShortName(primitiveClass);
   }
   
   

+ 68 - 0
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/typeinfo/TypeInfoUtils.java

@@ -3,11 +3,16 @@ package org.apache.hadoop.hive.ql.typeinfo;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -109,4 +114,67 @@ public class TypeInfoUtils {
     return result;
   }
     
+  public static String getFieldSchemaTypeFromTypeInfo(TypeInfo typeInfo) {
+    switch(typeInfo.getCategory()) {
+      case PRIMITIVE: {
+        return ObjectInspectorUtils.getClassShortName(typeInfo.getPrimitiveClass());
+      }
+      case LIST: {
+        String elementType = getFieldSchemaTypeFromTypeInfo(typeInfo.getListElementTypeInfo());
+        return org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME + "<" + elementType + ">";
+      }
+      case MAP: {
+        String keyType = getFieldSchemaTypeFromTypeInfo(typeInfo.getMapKeyTypeInfo());
+        String valueType = getFieldSchemaTypeFromTypeInfo(typeInfo.getMapValueTypeInfo());
+        return org.apache.hadoop.hive.serde.Constants.MAP_TYPE_NAME + "<" +
+          keyType + "," + valueType + ">";
+      }
+      case STRUCT: {
+        throw new RuntimeException("Complex struct type not supported!");
+      }
+      default: {
+        throw new RuntimeException("Unknown type!");
+      }
+    }
+  }
+  
+  /**
+   * Convert TypeInfo to FieldSchema. 
+   */
+  public static FieldSchema getFieldSchemaFromTypeInfo(String fieldName, TypeInfo typeInfo) {
+    return new FieldSchema(
+        fieldName, getFieldSchemaTypeFromTypeInfo(typeInfo), "generated by TypeInfoUtils.getFieldSchemaFromTypeInfo"
+    );
+  }
+
+  /**
+   * The mapping from type name in DDL to the Java class. 
+   */
+  public static final Map<String, Class<?>> TypeNameToClass = new HashMap<String, Class<?>>();
+  static {
+    TypeNameToClass.put(Constants.BOOLEAN_TYPE_NAME, Boolean.class);
+    TypeNameToClass.put(Constants.TINYINT_TYPE_NAME, Byte.class);
+    TypeNameToClass.put(Constants.SMALLINT_TYPE_NAME, Short.class);
+    TypeNameToClass.put(Constants.INT_TYPE_NAME, Integer.class);
+    TypeNameToClass.put(Constants.BIGINT_TYPE_NAME, Long.class);
+    TypeNameToClass.put(Constants.FLOAT_TYPE_NAME, Float.class);
+    TypeNameToClass.put(Constants.DOUBLE_TYPE_NAME, Double.class);
+    TypeNameToClass.put(Constants.STRING_TYPE_NAME, String.class);
+    TypeNameToClass.put(Constants.DATE_TYPE_NAME, java.sql.Date.class);
+    // These types are not supported yet. 
+    // TypeNameToClass.put(Constants.DATETIME_TYPE_NAME);
+    // TypeNameToClass.put(Constants.TIMESTAMP_TYPE_NAME);
+  }
+  
+  /**
+   * Return the primitive type corresponding to the field schema
+   * @param field The field schema
+   * @return The TypeInfo object, or null if the field is not a primitive type.
+   */
+  public static TypeInfo getPrimitiveTypeInfoFromFieldSchema(FieldSchema field) {
+    String type = field.getType();
+    
+    Class<?> c = TypeNameToClass.get(type);
+    return c == null ? null : TypeInfoFactory.getPrimitiveTypeInfo(c);
+  }
 }

+ 5 - 5
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFAvg.java

@@ -37,9 +37,9 @@ public class UDAFAvg extends UDAF {
     mCount = 0;
   }
 
-  public boolean aggregate(String o) {
-    if (o != null && !o.isEmpty()) {
-      mSum += Double.parseDouble(o);
+  public boolean aggregate(Double o) {
+    if (o != null) {
+      mSum += o;
       mCount ++;
     }
     return true;
@@ -60,9 +60,9 @@ public class UDAFAvg extends UDAF {
     return true;
   }
 
-  public String evaluate() {
+  public Double evaluate() {
     // This is SQL standard - average of zero items should be null.
-    return mCount == 0 ? null : String.valueOf(mSum / mCount);
+    return mCount == 0 ? null : Double.valueOf(mSum / mCount);
   }
 
 }

+ 6 - 6
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFCount.java

@@ -42,17 +42,17 @@ public class UDAFCount extends UDAF {
     return true;
   }
 
-  public String evaluatePartial() {
-    return Long.valueOf(mCount).toString();
+  public Long evaluatePartial() {
+    return Long.valueOf(mCount);
   }
 
-  public boolean aggregatePartial(String count) {
-    mCount += Long.parseLong(count);
+  public boolean aggregatePartial(Long count) {
+    mCount += count;
     return true;
   }
 
-  public String evaluate() {
-    return Long.valueOf(mCount).toString();
+  public Long evaluate() {
+    return Long.valueOf(mCount);
   }
 
   

+ 9 - 9
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMax.java

@@ -37,28 +37,28 @@ public class UDAFMax extends UDAF {
     mEmpty = true;
   }
 
-  public boolean aggregate(String o) {
-    if (o != null && !o.isEmpty()) {
+  public boolean aggregate(Double o) {
+    if (o != null) {
       if (mEmpty) {
-        mMax = Double.parseDouble(o);
+        mMax = o;
         mEmpty = false;
       } else {
-        mMax = Math.max(mMax, Double.parseDouble(o));
+        mMax = Math.max(mMax, o);
       }
     }
     return true;
   }
   
-  public String evaluatePartial() {
-    return mEmpty ? null : String.valueOf(mMax);
+  public Double evaluatePartial() {
+    return mEmpty ? null : Double.valueOf(mMax);
   }
 
-  public boolean aggregatePartial(String o) {
+  public boolean aggregatePartial(Double o) {
     return aggregate(o);
   }
 
-  public String evaluate() {
-    return mEmpty ? null : String.valueOf(mMax);
+  public Double evaluate() {
+    return mEmpty ? null : Double.valueOf(mMax);
   }
 
 }

+ 9 - 9
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFMin.java

@@ -37,28 +37,28 @@ public class UDAFMin extends UDAF {
     mEmpty = true;
   }
 
-  public boolean aggregate(String o) {
-    if (o != null && !o.isEmpty()) {
+  public boolean aggregate(Double o) {
+    if (o != null) {
       if (mEmpty) {
-        mMin = Double.parseDouble(o);
+        mMin = o;
         mEmpty = false;
       } else {
-        mMin = Math.min(mMin, Double.parseDouble(o));
+        mMin = Math.min(mMin, o);
       }
     }
     return true;
   }
   
-  public String evaluatePartial() {
-    return mEmpty ? null : String.valueOf(mMin);
+  public Double evaluatePartial() {
+    return mEmpty ? null : Double.valueOf(mMin);
   }
 
-  public boolean aggregatePartial(String o) {
+  public boolean aggregatePartial(Double o) {
     return aggregate(o);
   }
 
-  public String evaluate() {
-    return mEmpty ? null : String.valueOf(mMin);
+  public Double evaluate() {
+    return mEmpty ? null : Double.valueOf(mMin);
   }
 
 }

+ 10 - 10
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFSum.java

@@ -37,30 +37,30 @@ public class UDAFSum extends UDAF {
     mEmpty = true;
   }
 
-  public boolean aggregate(String o) {
-    if (o != null && !o.isEmpty()) {
-      mSum += Double.parseDouble(o);
+  public boolean aggregate(Double o) {
+    if (o != null) {
+      mSum += o;
       mEmpty = false;
     }
     return true;
   }
   
-  public String evaluatePartial() {
+  public Double evaluatePartial() {
     // This is SQL standard - sum of zero items should be null.
-    return mEmpty ? null : new Double(mSum).toString();
+    return mEmpty ? null : new Double(mSum);
   }
 
-  public boolean aggregatePartial(String o) {
-    if (o != null && !o.isEmpty()) {
-      mSum += Double.parseDouble(o);
+  public boolean aggregatePartial(Double o) {
+    if (o != null) {
+      mSum += o;
       mEmpty = false;
     }
     return true;
   }
 
-  public String evaluate() {
+  public Double evaluate() {
     // This is SQL standard - sum of zero items should be null.
-    return mEmpty ? null : new Double(mSum).toString();
+    return mEmpty ? null : new Double(mSum);
   }
 
 }

+ 0 - 8
src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java

@@ -78,12 +78,4 @@ public class UDFToString implements UDF {
     }
   }
   
-  public String evaluate(java.sql.Date i) {
-    if (i == null) {
-      return null;
-    } else {
-      return i.toString();
-    }
-  }
-  
 }

+ 4 - 0
src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java

@@ -646,6 +646,10 @@ public class QTestUtil {
     // Do semantic analysis and plan generation
     Context ctx = new Context(conf);
     ctx.makeScratchDir();
+    while((ast.getToken() == null) && (ast.getChildCount() > 0)) {
+      ast = (CommonTree)ast.getChild(0);
+    }
+    
     sem.analyze(ast, ctx);
     ctx.removeScratchDir();
     return sem.getRootTasks();

+ 14 - 5
src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java

@@ -198,7 +198,7 @@ public class TestExecDriver extends TestCase {
   @SuppressWarnings("unchecked")
   private void populateMapRedPlan1(Table src) {
     mr.setNumReduceTasks(Integer.valueOf(1));
-
+    
     // map-side work
     Operator<reduceSinkDesc> op1 = OperatorFactory.get
       (PlanUtils.getReduceSinkDesc
@@ -206,6 +206,8 @@ public class TestExecDriver extends TestCase {
         Utilities.makeList(new exprNodeColumnDesc(String.class, "value")), -1, 1, -1, false));
 
     Utilities.addMapWork(mr, src, "a", op1);
+    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
+    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
 
     // reduce side work
     Operator<fileSinkDesc> op3 = OperatorFactory.get(new fileSinkDesc
@@ -230,6 +232,8 @@ public class TestExecDriver extends TestCase {
                            new exprNodeColumnDesc(String.class, "value")), -1, 1, -1, false));
 
     Utilities.addMapWork(mr, src, "a", op1);
+    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
+    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
 
     // reduce side work
     Operator<fileSinkDesc> op4 = OperatorFactory.get(new fileSinkDesc
@@ -261,6 +265,8 @@ public class TestExecDriver extends TestCase {
         (new exprNodeColumnDesc(String.class, "value")), Byte.valueOf((byte)0), 1, -1, false));
 
     Utilities.addMapWork(mr, src, "a", op1);
+    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
+    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
 
     Operator<reduceSinkDesc> op2 = OperatorFactory.get
       (PlanUtils.getReduceSinkDesc
@@ -270,10 +276,7 @@ public class TestExecDriver extends TestCase {
         Integer.MAX_VALUE, -1, false));
 
     Utilities.addMapWork(mr, src2, "b", op2);
-
-    // just to satisfy the constraint that each tag must define a schema
-    mr.getAliasToSchema().put("a", new schemaDesc(""));
-    mr.getAliasToSchema().put("b", new schemaDesc(""));
+    mr.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());
 
     // reduce side work
     Operator<fileSinkDesc> op4 = OperatorFactory.get(new fileSinkDesc
@@ -318,6 +321,8 @@ public class TestExecDriver extends TestCase {
                                                         new exprNodeColumnDesc(String.class, "value"))), op0);
 
     Utilities.addMapWork(mr, src, "a", op4);
+    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
+    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
 
     // reduce side work
     Operator<fileSinkDesc> op3 = OperatorFactory.get(new fileSinkDesc
@@ -348,6 +353,8 @@ public class TestExecDriver extends TestCase {
                                                         new exprNodeColumnDesc(String.class, "value"))), op0);
 
     Utilities.addMapWork(mr, src, "a", op4);
+    mr.setKeyDesc(op0.getConf().getKeySerializeInfo());
+    mr.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());
 
     // reduce side work
     Operator<fileSinkDesc> op3 = OperatorFactory.get(new fileSinkDesc
@@ -384,6 +391,8 @@ public class TestExecDriver extends TestCase {
                                                         new exprNodeColumnDesc(String.class, "value"))), op0);
 
     Utilities.addMapWork(mr, src, "a", op4);
+    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
+    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
 
     // reduce side work
     Operator<fileSinkDesc> op3 = OperatorFactory.get(new fileSinkDesc

+ 13 - 9
src/contrib/hive/serde/src/gen-java/org/apache/hadoop/hive/serde/dynamic_type/DynamicSerDeConstList.java → src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/io/JavaTestObjFlatFileInputFormat.java

@@ -16,17 +16,21 @@
  * limitations under the License.
  */
 
-/* Generated By:JJTree: Do not edit this line. DynamicSerDeConstList.java */
+package org.apache.hadoop.hive.ql.io;
 
-package org.apache.hadoop.hive.serde.dynamic_type;
+import java.io.Serializable;
 
-public class DynamicSerDeConstList extends SimpleNode {
-  public DynamicSerDeConstList(int id) {
-    super(id);
+/**
+ * Simple test object
+ */
+public class JavaTestObjFlatFileInputFormat implements Serializable {
+  public String s;
+  public int num;
+  public JavaTestObjFlatFileInputFormat(String s, int num) {
+    this.s = s;
+    this.num = num;
   }
-
-  public DynamicSerDeConstList(thrift_grammar p, int id) {
-    super(p, id);
+  public JavaTestObjFlatFileInputFormat() { 
   }
-
 }
+

+ 212 - 0
src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/io/RecordTestObj.java

@@ -0,0 +1,212 @@
+// File generated by hadoop record compiler. Do not edit.
+package org.apache.hadoop.hive.ql.io;
+
+public class RecordTestObj extends org.apache.hadoop.record.Record {
+  private static final org.apache.hadoop.record.meta.RecordTypeInfo _rio_recTypeInfo;
+  private static org.apache.hadoop.record.meta.RecordTypeInfo _rio_rtiFilter;
+  private static int[] _rio_rtiFilterFields;
+  static {
+    _rio_recTypeInfo = new org.apache.hadoop.record.meta.RecordTypeInfo("RecordTestObj");
+    _rio_recTypeInfo.addField("s", org.apache.hadoop.record.meta.TypeID.StringTypeID);
+    _rio_recTypeInfo.addField("num", org.apache.hadoop.record.meta.TypeID.LongTypeID);
+  }
+  
+  private String s;
+  private long num;
+  public RecordTestObj() { }
+  public RecordTestObj(
+    final String s,
+    final long num) {
+    this.s = s;
+    this.num = num;
+  }
+  public static org.apache.hadoop.record.meta.RecordTypeInfo getTypeInfo() {
+    return _rio_recTypeInfo;
+  }
+  public static void setTypeFilter(org.apache.hadoop.record.meta.RecordTypeInfo rti) {
+    if (null == rti) return;
+    _rio_rtiFilter = rti;
+    _rio_rtiFilterFields = null;
+  }
+  private static void setupRtiFields()
+  {
+    if (null == _rio_rtiFilter) return;
+    // we may already have done this
+    if (null != _rio_rtiFilterFields) return;
+    int _rio_i, _rio_j;
+    _rio_rtiFilterFields = new int [_rio_rtiFilter.getFieldTypeInfos().size()];
+    for (_rio_i=0; _rio_i<_rio_rtiFilterFields.length; _rio_i++) {
+      _rio_rtiFilterFields[_rio_i] = 0;
+    }
+    java.util.Iterator<org.apache.hadoop.record.meta.FieldTypeInfo> _rio_itFilter = _rio_rtiFilter.getFieldTypeInfos().iterator();
+    _rio_i=0;
+    while (_rio_itFilter.hasNext()) {
+      org.apache.hadoop.record.meta.FieldTypeInfo _rio_tInfoFilter = _rio_itFilter.next();
+      java.util.Iterator<org.apache.hadoop.record.meta.FieldTypeInfo> _rio_it = _rio_recTypeInfo.getFieldTypeInfos().iterator();
+      _rio_j=1;
+      while (_rio_it.hasNext()) {
+        org.apache.hadoop.record.meta.FieldTypeInfo _rio_tInfo = _rio_it.next();
+        if (_rio_tInfo.equals(_rio_tInfoFilter)) {
+          _rio_rtiFilterFields[_rio_i] = _rio_j;
+          break;
+        }
+        _rio_j++;
+      }
+      _rio_i++;
+    }
+  }
+  public String getS() {
+    return s;
+  }
+  public void setS(final String s) {
+    this.s=s;
+  }
+  public long getNum() {
+    return num;
+  }
+  public void setNum(final long num) {
+    this.num=num;
+  }
+  public void serialize(final org.apache.hadoop.record.RecordOutput _rio_a, final String _rio_tag)
+  throws java.io.IOException {
+    _rio_a.startRecord(this,_rio_tag);
+    _rio_a.writeString(s,"s");
+    _rio_a.writeLong(num,"num");
+    _rio_a.endRecord(this,_rio_tag);
+  }
+  private void deserializeWithoutFilter(final org.apache.hadoop.record.RecordInput _rio_a, final String _rio_tag)
+  throws java.io.IOException {
+    _rio_a.startRecord(_rio_tag);
+    s=_rio_a.readString("s");
+    num=_rio_a.readLong("num");
+    _rio_a.endRecord(_rio_tag);
+  }
+  public void deserialize(final org.apache.hadoop.record.RecordInput _rio_a, final String _rio_tag)
+  throws java.io.IOException {
+    if (null == _rio_rtiFilter) {
+      deserializeWithoutFilter(_rio_a, _rio_tag);
+      return;
+    }
+    // if we're here, we need to read based on version info
+    _rio_a.startRecord(_rio_tag);
+    setupRtiFields();
+    for (int _rio_i=0; _rio_i<_rio_rtiFilter.getFieldTypeInfos().size(); _rio_i++) {
+      if (1 == _rio_rtiFilterFields[_rio_i]) {
+        s=_rio_a.readString("s");
+      }
+      else if (2 == _rio_rtiFilterFields[_rio_i]) {
+        num=_rio_a.readLong("num");
+      }
+      else {
+        java.util.ArrayList<org.apache.hadoop.record.meta.FieldTypeInfo> typeInfos = (java.util.ArrayList<org.apache.hadoop.record.meta.FieldTypeInfo>)(_rio_rtiFilter.getFieldTypeInfos());
+        org.apache.hadoop.record.meta.Utils.skip(_rio_a, typeInfos.get(_rio_i).getFieldID(), typeInfos.get(_rio_i).getTypeID());
+      }
+    }
+    _rio_a.endRecord(_rio_tag);
+  }
+  public int compareTo (final Object _rio_peer_) throws ClassCastException {
+    if (!(_rio_peer_ instanceof RecordTestObj)) {
+      throw new ClassCastException("Comparing different types of records.");
+    }
+    RecordTestObj _rio_peer = (RecordTestObj) _rio_peer_;
+    int _rio_ret = 0;
+    _rio_ret = s.compareTo(_rio_peer.s);
+    if (_rio_ret != 0) return _rio_ret;
+    _rio_ret = (num == _rio_peer.num)? 0 :((num<_rio_peer.num)?-1:1);
+    if (_rio_ret != 0) return _rio_ret;
+    return _rio_ret;
+  }
+  public boolean equals(final Object _rio_peer_) {
+    if (!(_rio_peer_ instanceof RecordTestObj)) {
+      return false;
+    }
+    if (_rio_peer_ == this) {
+      return true;
+    }
+    RecordTestObj _rio_peer = (RecordTestObj) _rio_peer_;
+    boolean _rio_ret = false;
+    _rio_ret = s.equals(_rio_peer.s);
+    if (!_rio_ret) return _rio_ret;
+    _rio_ret = (num==_rio_peer.num);
+    if (!_rio_ret) return _rio_ret;
+    return _rio_ret;
+  }
+  public Object clone() throws CloneNotSupportedException {
+    RecordTestObj _rio_other = new RecordTestObj();
+    _rio_other.s = this.s;
+    _rio_other.num = this.num;
+    return _rio_other;
+  }
+  public int hashCode() {
+    int _rio_result = 17;
+    int _rio_ret;
+    _rio_ret = s.hashCode();
+    _rio_result = 37*_rio_result + _rio_ret;
+    _rio_ret = (int) (num^(num>>>32));
+    _rio_result = 37*_rio_result + _rio_ret;
+    return _rio_result;
+  }
+  public static String signature() {
+    return "LRecordTestObj(sl)";
+  }
+  public static class Comparator extends org.apache.hadoop.record.RecordComparator {
+    public Comparator() {
+      super(RecordTestObj.class);
+    }
+    static public int slurpRaw(byte[] b, int s, int l) {
+      try {
+        int os = s;
+        {
+          int i = org.apache.hadoop.record.Utils.readVInt(b, s);
+          int z = org.apache.hadoop.record.Utils.getVIntSize(i);
+          s+=(z+i); l-= (z+i);
+        }
+        {
+          long i = org.apache.hadoop.record.Utils.readVLong(b, s);
+          int z = org.apache.hadoop.record.Utils.getVIntSize(i);
+          s+=z; l-=z;
+        }
+        return (os - s);
+      } catch(java.io.IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    static public int compareRaw(byte[] b1, int s1, int l1,
+                                   byte[] b2, int s2, int l2) {
+      try {
+        int os1 = s1;
+        {
+          int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);
+          int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);
+          int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);
+          int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);
+          s1+=z1; s2+=z2; l1-=z1; l2-=z2;
+          int r1 = org.apache.hadoop.record.Utils.compareBytes(b1,s1,i1,b2,s2,i2);
+          if (r1 != 0) { return (r1<0)?-1:0; }
+          s1+=i1; s2+=i2; l1-=i1; l1-=i2;
+        }
+        {
+          long i1 = org.apache.hadoop.record.Utils.readVLong(b1, s1);
+          long i2 = org.apache.hadoop.record.Utils.readVLong(b2, s2);
+          if (i1 != i2) {
+            return ((i1-i2) < 0) ? -1 : 0;
+          }
+          int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);
+          int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);
+          s1+=z1; s2+=z2; l1-=z1; l2-=z2;
+        }
+        return (os1 - s1);
+      } catch(java.io.IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    public int compare(byte[] b1, int s1, int l1,
+                         byte[] b2, int s2, int l2) {
+      int ret = compareRaw(b1,s1,l1,b2,s2,l2);
+      return (ret == -1)? -1 : ((ret==0)? 1 : 0);}
+  }
+  
+  static {
+    org.apache.hadoop.record.RecordComparator.define(RecordTestObj.class, new Comparator());
+  }
+}

+ 281 - 0
src/contrib/hive/ql/src/test/org/apache/hadoop/hive/ql/io/TestFlatFileInputFormat.java

@@ -0,0 +1,281 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.*;
+import java.util.*;
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.*;
+
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.record.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.io.serializer.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.facebook.thrift.*;
+import com.facebook.thrift.transport.*;
+import com.facebook.thrift.protocol.*;
+
+//import org.apache.hadoop.contrib.serialization.thrift.*;
+
+public class TestFlatFileInputFormat extends TestCase  {
+
+  public void testFlatFileInputJava() throws Exception {
+    Configuration conf;
+    JobConf job ;
+    FileSystem fs;
+    Path dir ;
+    Path file;
+    Reporter reporter;
+    FSDataOutputStream ds;
+
+    try {
+      //
+      // create job and filesystem and reporter and such.
+      //
+      conf = new Configuration();
+      job = new JobConf(conf);
+      fs = FileSystem.getLocal(conf);
+      dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
+      file = new Path(dir, "test.txt");
+      reporter = Reporter.NULL;
+      fs.delete(dir, true);
+
+      job.setClass(FlatFileInputFormat.SerializationImplKey,
+                   org.apache.hadoop.io.serializer.JavaSerialization.class,
+                   org.apache.hadoop.io.serializer.Serialization.class);
+      
+      job.setClass(FlatFileInputFormat.SerializationContextFromConf.SerializationSubclassKey,
+                   JavaTestObjFlatFileInputFormat.class, java.io.Serializable.class);
+      
+      //
+      // Write some data out to a flat file
+      //
+      FileInputFormat.setInputPaths(job, dir);
+      ds = fs.create(file);
+      Serializer serializer = new JavaSerialization().getSerializer(null);
+
+      // construct some data and write it
+      serializer.open(ds);
+      for (int i = 0; i < 10; i++) {
+        serializer.serialize(new JavaTestObjFlatFileInputFormat("Hello World! " + String.valueOf(i), i));
+      }
+      serializer.close();
+
+      //
+      // Construct the reader
+      //
+      FileInputFormat<Void, FlatFileInputFormat.RowContainer<Serializable>> format =
+        new FlatFileInputFormat<Serializable>();
+      InputSplit[] splits = format.getSplits(job, 1);
+
+      // construct the record reader
+      RecordReader<Void, FlatFileInputFormat.RowContainer<Serializable>> reader =
+        format.getRecordReader(splits[0], job, reporter);
+
+      // create key/value
+      Void key = reader.createKey();
+      FlatFileInputFormat.RowContainer<Serializable> value = reader.createValue();
+      
+      //
+      // read back the data using the FlatFileRecordReader
+      //
+      int count = 0;
+      while (reader.next(key, value)) {
+        assertTrue(key == null);
+        assertTrue(((JavaTestObjFlatFileInputFormat)value.row).s.equals("Hello World! " +String.valueOf(count)));
+        assertTrue(((JavaTestObjFlatFileInputFormat)value.row).num == count);
+        count++;
+      }
+      reader.close();
+
+    } catch(Exception e) {
+      System.err.println("caught: " + e);
+      e.printStackTrace();
+    } finally {
+    }
+
+  }
+
+  public void testFlatFileInputRecord() throws Exception {
+    Configuration conf;
+    JobConf job ;
+    FileSystem fs;
+    Path dir ;
+    Path file;
+    Reporter reporter;
+    FSDataOutputStream ds;
+
+    try {
+      //
+      // create job and filesystem and reporter and such.
+      //
+      conf = new Configuration();
+      job = new JobConf(conf);
+      fs = FileSystem.getLocal(conf);
+      dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
+      file = new Path(dir, "test.txt");
+      reporter = Reporter.NULL;
+      fs.delete(dir, true);
+
+      job.setClass(FlatFileInputFormat.SerializationImplKey,
+                   org.apache.hadoop.io.serializer.WritableSerialization.class,
+                   org.apache.hadoop.io.serializer.Serialization.class);
+      
+      job.setClass(FlatFileInputFormat.SerializationContextFromConf.SerializationSubclassKey,
+                   RecordTestObj.class, Writable.class);
+      
+      //
+      // Write some data out to a flat file
+      //
+      FileInputFormat.setInputPaths(job, dir);
+      ds = fs.create(file);
+      Serializer serializer = new WritableSerialization().getSerializer(Writable.class);
+
+      // construct some data and write it
+      serializer.open(ds);
+      for (int i = 0; i < 10; i++) {
+        serializer.serialize(new RecordTestObj("Hello World! " + String.valueOf(i), i));
+      }
+      serializer.close();
+
+      //
+      // Construct the reader
+      //
+      FileInputFormat<Void, FlatFileInputFormat.RowContainer<Writable>> format =
+        new FlatFileInputFormat<Writable>();
+      InputSplit[] splits = format.getSplits(job, 1);
+
+      // construct the record reader
+      RecordReader<Void, FlatFileInputFormat.RowContainer<Writable>> reader =
+        format.getRecordReader(splits[0], job, reporter);
+
+      // create key/value
+      Void key = reader.createKey();
+      FlatFileInputFormat.RowContainer<Writable> value = reader.createValue();
+      
+      //
+      // read back the data using the FlatFileRecordReader
+      //
+      int count = 0;
+      while (reader.next(key, value)) {
+        assertTrue(key == null);
+        assertTrue(((RecordTestObj)value.row).getS().equals("Hello World! " +String.valueOf(count)));
+        assertTrue(((RecordTestObj)value.row).getNum() == count);
+        count++;
+      }
+      reader.close();
+
+    } catch(Exception e) {
+      System.err.println("caught: " + e);
+      e.printStackTrace();
+    } finally {
+    }
+
+  }
+  /*
+  public void testFlatFileInputThrift() throws Exception {
+    Configuration conf;
+    JobConf job ;
+    FileSystem fs;
+    Path dir ;
+    Path file;
+    Reporter reporter;
+    FSDataOutputStream ds;
+
+    try {
+      //
+      // create job and filesystem and reporter and such.
+      //
+      conf = new Configuration();
+      job = new JobConf(conf);
+      fs = FileSystem.getLocal(conf);
+      dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
+      file = new Path(dir, "test.txt");
+      reporter = Reporter.NULL;
+      fs.delete(dir, true);
+
+      job.setClass(FlatFileInputFormat.SerializationContextFromConf.SerializationImplKey,
+                   org.apache.hadoop.contrib.serialization.thrift.ThriftSerialization.class,
+                   org.apache.hadoop.io.serializer.Serialization.class);
+      
+      job.setClass(FlatFileInputFormat.SerializationContextFromConf.SerializationSubclassKey,
+                   FlatFileThriftTestObj.class, TBase.class);
+      
+      //
+      // Write some data out to a flat file
+      //
+      FileInputFormat.setInputPaths(job, dir);
+      ds = fs.create(file);
+      Serializer serializer = new ThriftSerialization().getSerializer(TBase.class);
+
+      // construct some data and write it
+      serializer.open(ds);
+      for (int i = 0; i < 10; i++) {
+        serializer.serialize(new FlatFileThriftTestObj("Hello World! " + String.valueOf(i), i));
+      }
+      serializer.close();
+
+      //
+      // Construct the reader
+      //
+      FileInputFormat<Void, FlatFileInputFormat.RowContainer<TBase>> format =
+        new FlatFileInputFormat<TBase>();
+      InputSplit[] splits = format.getSplits(job, 1);
+
+      // construct the record reader
+      RecordReader<Void, FlatFileInputFormat.RowContainer<TBase>> reader =
+        format.getRecordReader(splits[0], job, reporter);
+
+      // create key/value
+      Void key = reader.createKey();
+      FlatFileInputFormat.RowContainer<TBase> value = reader.createValue();
+      
+      //
+      // read back the data using the FlatFileRecordReader
+      //
+      int count = 0;
+      while (reader.next(key, value)) {
+        assertTrue(key == null);
+        assertTrue(((FlatFileThriftTestObj)value.row).s.equals("Hello World! " +String.valueOf(count)));
+        assertTrue(((FlatFileThriftTestObj)value.row).num == count);
+        count++;
+      }
+      reader.close();
+
+    } catch(Exception e) {
+      System.err.println("caught: " + e);
+      e.printStackTrace();
+    } finally {
+    }
+
+  }
+  */
+
+
+  public static void main(String[] args) throws Exception {
+    new TestFlatFileInputFormat().testFlatFileInputJava();
+    new TestFlatFileInputFormat().testFlatFileInputRecord();
+    //    new TestFlatFileInputFormat().testFlatFileInputThrift();
+  }
+}

+ 6 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/bad_sample_clause.q

@@ -0,0 +1,6 @@
+CREATE TABLE dest1(key INT, value STRING, dt STRING, hr STRING) STORED AS TEXTFILE;
+
+INSERT OVERWRITE TABLE dest1 SELECT s.*
+FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 2) s
+WHERE s.ds='2008-04-08' and s.hr='11';
+

+ 1 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/input1.q

@@ -0,0 +1 @@
+SELECT a.* FROM src; 

+ 1 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/input2.q

@@ -0,0 +1 @@
+SELECT a.key FROM src; 

+ 6 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/input_testxpath4.q

@@ -0,0 +1,6 @@
+EXPLAIN
+FROM src_thrift
+SELECT src_thrift.mstringstring['key_9'], lintstring.myint;
+
+FROM src_thrift
+SELECT src_thrift.mstringstring['key_9'], lintstring.myint;

+ 7 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/invalid_create_tbl1.q

@@ -0,0 +1,7 @@
+DROP TABLE inv_valid_tbl1;
+CREATE TABLE inv_valid_tbl1 COMMENT 'This is a thrift based table' 
+    PARTITIONED BY(aint DATETIME, country STRING) 
+    CLUSTERED BY(aint) SORTED BY(lint) INTO 32 BUCKETS
+    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.ThriftDeserializer' WITH SERDEPROPERTIES ('serialization.class' = 'org.apache.hadoop.hive.serde2.thrift.test.Complex', 'serialization.format' = 'com.facebook.thrift.protocol.TBinaryProtocol')
+    STORED AS SEQUENCEFILE;
+DESCRIBE EXTENDED inv_valid_tbl1;

+ 1 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/invalid_tbl_name.q

@@ -0,0 +1 @@
+create table invalid-name(a int, b string);

+ 6 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/joinneg.q

@@ -0,0 +1,6 @@
+EXPLAIN FROM 
+(SELECT src.* FROM src) x
+JOIN 
+(SELECT src.* FROM src) Y
+ON (x.key = b.key)
+SELECT Y.*;

+ 6 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/load_wrong_fileformat.q

@@ -0,0 +1,6 @@
+-- test for loading into tables with the correct file format
+-- test for loading into partitions with the correct file format
+
+DROP TABLE T1;
+CREATE TABLE T1(name STRING) STORED AS SEQUENCEFILE;
+LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1;

+ 4 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/notable_alias3.q

@@ -0,0 +1,4 @@
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key;

+ 4 - 0
src/contrib/hive/ql/src/test/queries/clientnegative/notable_alias4.q

@@ -0,0 +1,4 @@
+EXPLAIN
+SELECT key from src JOIN src1 on src1.key=src.key;
+
+SELECT key from src JOIN src1 on src1.key=src.key;

+ 20 - 0
src/contrib/hive/ql/src/test/queries/clientpositive/alter1.q

@@ -0,0 +1,20 @@
+drop table alter1;
+create table alter1(a int, b int);
+describe extended alter1;
+alter table alter1 set tblproperties ('a'='1', 'c'='3');
+describe extended alter1;
+alter table alter1 set tblproperties ('a'='1', 'c'='4', 'd'='3');
+describe extended alter1;
+
+alter table alter1 set serdeproperties('s1'='9');
+describe extended alter1;
+alter table alter1 set serdeproperties('s1'='10', 's2' ='20');
+describe extended alter1;
+
+alter table alter1 set serde 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties('s1'='9');
+describe extended alter1;
+
+alter table alter1 set serde 'org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe';
+describe extended alter1;
+
+drop table alter1;

+ 1 - 1
src/contrib/hive/ql/src/test/queries/clientpositive/case_sensitivity.q

@@ -1,4 +1,4 @@
-CREATE TABLE DEST1(Key INT, VALUE STRING);
+CREATE TABLE DEST1(Key INT, VALUE STRING) STORED AS TEXTFILE;
 
 EXPLAIN
 FROM SRC_THRIFT

+ 1 - 1
src/contrib/hive/ql/src/test/queries/clientpositive/cast1.q

@@ -1,4 +1,4 @@
-CREATE TABLE dest1(c1 INT, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 INT, c6 INT, c7 INT);
+CREATE TABLE dest1(c1 INT, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 INT, c6 INT, c7 INT) STORED AS TEXTFILE;
 
 EXPLAIN
 FROM src INSERT OVERWRITE TABLE dest1 SELECT 3 + 2, 3.0 + 2, 3 + 2.0, 3.0 + 2.0, 3 + CAST(2.0 AS INT), CAST(1 AS BOOLEAN), CAST(TRUE AS INT) WHERE src.key = 86;

+ 1 - 1
src/contrib/hive/ql/src/test/queries/clientpositive/groupby1.q

@@ -1,4 +1,4 @@
-CREATE TABLE dest1(key INT, value DOUBLE);
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
 
 EXPLAIN
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,4)) GROUP BY src.key;

+ 1 - 1
src/contrib/hive/ql/src/test/queries/clientpositive/groupby1_limit.q

@@ -1,6 +1,6 @@
 set mapred.reduce.tasks=31;
 
-CREATE TABLE dest1(key INT, value DOUBLE);
+CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;
 
 EXPLAIN
 FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,4)) GROUP BY src.key LIMIT 5;

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.