Browse Source

MAPREDUCE-7432. Make manifest committer default on abfs and gcs stores (#5378)

By default, the mapreduce manifest committer is used for jobs working with abfs and gcs.
Hadoop mapreduce will pick this up automatically; for Spark it is a bit complicated: read the docs
to see the steps required.
Steve Loughran 1 year ago
parent
commit
48f1a3c548

+ 6 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

@@ -2242,23 +2242,23 @@
   </description>
 </property>
 
-<!-- not yet enabled by default.
-
+<!-- use manifest committer for abfs URLs -->
 <property>
   <name>mapreduce.outputcommitter.factory.scheme.abfs</name>
   <value>org.apache.hadoop.fs.azurebfs.commit.AzureManifestCommitterFactory</value>
   <description>
-    The default committer factory for ABFS is for the manifest committer with
-    abfs-specific tuning.
+    The default committer factory for ABFS is the manifest committer with
+    abfs-specific recovery.
   </description>
 </property>
 
+<!-- use manifest committer for gs URLs -->
 <property>
   <name>mapreduce.outputcommitter.factory.scheme.gs</name>
   <value>org.apache.hadoop.mapreduce.lib.output.committer.manifest.ManifestCommitterFactory</value>
   <description>
-    The default committer factory for google cloud storage is for the manifest committer.
+    The default committer factory for google cloud storage is the manifest committer.
   </description>
 </property>
--->
+
 </configuration>