فهرست منبع

HADOOP-18469. Add secure XML parser factories to XMLUtils (#4940)

Add to XMLUtils a set of methods to create secure XML Parsers/transformers, locking down DTD, schema, XXE exposure.

Use these wherever XML parsers are created.

Contributed by PJ Fanning
PJ Fanning 2 سال پیش
والد
کامیت
8336b91329

+ 6 - 5
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -24,7 +24,6 @@ import com.ctc.wstx.io.SystemId;
 import com.ctc.wstx.stax.WstxInputFactory;
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonGenerator;
-import org.apache.hadoop.classification.VisibleForTesting;
 
 import java.io.BufferedInputStream;
 import java.io.DataInput;
@@ -87,6 +86,7 @@ import org.apache.hadoop.thirdparty.com.google.common.base.Charsets;
 import org.apache.commons.collections.map.UnmodifiableMap;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.VisibleForTesting;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -98,18 +98,19 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.alias.CredentialProvider;
 import org.apache.hadoop.security.alias.CredentialProvider.CredentialEntry;
 import org.apache.hadoop.security.alias.CredentialProviderFactory;
+import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
+import org.apache.hadoop.util.Preconditions;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;
+
 import org.codehaus.stax2.XMLStreamReader2;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 
-import org.apache.hadoop.util.Preconditions;
-import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
-
 import static org.apache.commons.lang3.StringUtils.isBlank;
 import static org.apache.commons.lang3.StringUtils.isNotBlank;
 
@@ -3604,7 +3605,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     try {
       DOMSource source = new DOMSource(doc);
       StreamResult result = new StreamResult(out);
-      TransformerFactory transFactory = TransformerFactory.newInstance();
+      TransformerFactory transFactory = XMLUtils.newSecureTransformerFactory();
       Transformer transformer = transFactory.newTransformer();
 
       // Important to not hold Configuration log while writing result, since

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java

@@ -147,8 +147,8 @@ public class HostsFileReader {
       String filename, InputStream fileInputStream, Map<String, Integer> map)
           throws IOException {
     Document dom;
-    DocumentBuilderFactory builder = DocumentBuilderFactory.newInstance();
     try {
+      DocumentBuilderFactory builder = XMLUtils.newSecureDocumentBuilderFactory();
       DocumentBuilder db = builder.newDocumentBuilder();
       dom = db.parse(fileInputStream);
       // Examples:

+ 99 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java

@@ -18,12 +18,19 @@
 
 package org.apache.hadoop.util;
 
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.*;
+import javax.xml.transform.sax.SAXTransformerFactory;
 import javax.xml.transform.stream.*;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
+import org.xml.sax.SAXException;
+
 import java.io.*;
 
 /**
@@ -33,6 +40,19 @@ import java.io.*;
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
 public class XMLUtils {
+
+  private static final String DISALLOW_DOCTYPE_DECL =
+      "http://apache.org/xml/features/disallow-doctype-decl";
+  private static final String LOAD_EXTERNAL_DECL =
+      "http://apache.org/xml/features/nonvalidating/load-external-dtd";
+  private static final String EXTERNAL_GENERAL_ENTITIES =
+      "http://xml.org/sax/features/external-general-entities";
+  private static final String EXTERNAL_PARAMETER_ENTITIES =
+      "http://xml.org/sax/features/external-parameter-entities";
+  private static final String CREATE_ENTITY_REF_NODES =
+      "http://apache.org/xml/features/dom/create-entity-ref-nodes";
+
+
   /**
    * Transform input xml given a stylesheet.
    * 
@@ -49,7 +69,7 @@ public class XMLUtils {
                                ) 
     throws TransformerConfigurationException, TransformerException {
     // Instantiate a TransformerFactory
-    TransformerFactory tFactory = TransformerFactory.newInstance();
+    TransformerFactory tFactory = newSecureTransformerFactory();
 
     // Use the TransformerFactory to process the  
     // stylesheet and generate a Transformer
@@ -61,4 +81,82 @@ public class XMLUtils {
     // and send the output to a Result object.
     transformer.transform(new StreamSource(xml), new StreamResult(out));
   }
+
+  /**
+   * This method should be used if you need a {@link DocumentBuilderFactory}. Use this method
+   * instead of {@link DocumentBuilderFactory#newInstance()}. The factory that is returned has
+   * secure configuration enabled.
+   *
+   * @return a {@link DocumentBuilderFactory} with secure configuration enabled
+   * @throws ParserConfigurationException if the {@code JAXP} parser does not support the
+   * secure configuration
+   */
+  public static DocumentBuilderFactory newSecureDocumentBuilderFactory()
+          throws ParserConfigurationException {
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+    dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    dbf.setFeature(DISALLOW_DOCTYPE_DECL, true);
+    dbf.setFeature(LOAD_EXTERNAL_DECL, false);
+    dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false);
+    dbf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false);
+    dbf.setFeature(CREATE_ENTITY_REF_NODES, false);
+    return dbf;
+  }
+
+  /**
+   * This method should be used if you need a {@link SAXParserFactory}. Use this method
+   * instead of {@link SAXParserFactory#newInstance()}. The factory that is returned has
+   * secure configuration enabled.
+   *
+   * @return a {@link SAXParserFactory} with secure configuration enabled
+   * @throws ParserConfigurationException if the {@code JAXP} parser does not support the
+   * secure configuration
+   * @throws SAXException if there are another issues when creating the factory
+   */
+  public static SAXParserFactory newSecureSAXParserFactory()
+          throws SAXException, ParserConfigurationException {
+    SAXParserFactory spf = SAXParserFactory.newInstance();
+    spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    spf.setFeature(DISALLOW_DOCTYPE_DECL, true);
+    spf.setFeature(LOAD_EXTERNAL_DECL, false);
+    spf.setFeature(EXTERNAL_GENERAL_ENTITIES, false);
+    spf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false);
+    return spf;
+  }
+
+  /**
+   * This method should be used if you need a {@link TransformerFactory}. Use this method
+   * instead of {@link TransformerFactory#newInstance()}. The factory that is returned has
+   * secure configuration enabled.
+   *
+   * @return a {@link TransformerFactory} with secure configuration enabled
+   * @throws TransformerConfigurationException if the {@code JAXP} transformer does not
+   * support the secure configuration
+   */
+  public static TransformerFactory newSecureTransformerFactory()
+          throws TransformerConfigurationException {
+    TransformerFactory trfactory = TransformerFactory.newInstance();
+    trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+    trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
+    return trfactory;
+  }
+
+  /**
+   * This method should be used if you need a {@link SAXTransformerFactory}. Use this method
+   * instead of {@link SAXTransformerFactory#newInstance()}. The factory that is returned has
+   * secure configuration enabled.
+   *
+   * @return a {@link SAXTransformerFactory} with secure configuration enabled
+   * @throws TransformerConfigurationException if the {@code JAXP} transformer does not
+   * support the secure configuration
+   */
+  public static SAXTransformerFactory newSecureSAXTransformerFactory()
+          throws TransformerConfigurationException {
+    SAXTransformerFactory trfactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
+    trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+    trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
+    return trfactory;
+  }
 }

+ 3 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java

@@ -24,6 +24,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;
+
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -34,7 +36,6 @@ import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
 import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
 import java.io.File;
 import java.util.ArrayList;
 
@@ -76,7 +77,7 @@ public class CLITestHelper {
       boolean success = false;
       testConfigFile = TEST_CACHE_DATA_DIR + File.separator + testConfigFile;
       try {
-        SAXParser p = (SAXParserFactory.newInstance()).newSAXParser();
+        SAXParser p = XMLUtils.newSecureSAXParserFactory().newSAXParser();
         p.parse(testConfigFile, getConfigParser());
         success = true;
       } catch (Exception e) {

+ 4 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java

@@ -41,9 +41,12 @@ import org.xml.sax.InputSource;
 import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
 
 import org.apache.hadoop.http.HttpServer2;
+import org.apache.hadoop.util.XMLUtils;
+
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.mockito.Mockito;
+
 import static org.mockito.Mockito.when;
 import static org.mockito.Mockito.mock;
 import static org.junit.Assert.*;
@@ -223,8 +226,7 @@ public class TestConfServlet {
     ConfServlet.writeResponse(getTestConf(), sw, "xml");
     String xml = sw.toString();
 
-    DocumentBuilderFactory docBuilderFactory 
-      = DocumentBuilderFactory.newInstance();
+    DocumentBuilderFactory docBuilderFactory = XMLUtils.newSecureDocumentBuilderFactory();
     DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
     Document doc = builder.parse(new InputSource(new StringReader(xml)));
     NodeList nameNodes = doc.getElementsByTagName("name");

+ 134 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java

@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.InputStream;
+import java.io.StringReader;
+import java.io.StringWriter;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.SAXParser;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+
+import org.apache.hadoop.test.AbstractHadoopTestBase;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TestXMLUtils extends AbstractHadoopTestBase {
+
+  @Test
+  public void testSecureDocumentBuilderFactory() throws Exception {
+    DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    Assertions.assertThat(doc).describedAs("parsed document").isNotNull();
+  }
+
+  @Test(expected = SAXException.class)
+  public void testExternalDtdWithSecureDocumentBuilderFactory() throws Exception {
+    DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    try (InputStream stream = getResourceStream("/xml/external-dtd.xml")) {
+      Document doc = db.parse(stream);
+    }
+  }
+
+  @Test(expected = SAXException.class)
+  public void testEntityDtdWithSecureDocumentBuilderFactory() throws Exception {
+    DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    try (InputStream stream = getResourceStream("/xml/entity-dtd.xml")) {
+      Document doc = db.parse(stream);
+    }
+  }
+
+  @Test
+  public void testSecureSAXParserFactory() throws Exception {
+    SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser();
+    parser.parse(new InputSource(new StringReader("<root/>")), new DefaultHandler());
+  }
+
+  @Test(expected = SAXException.class)
+  public void testExternalDtdWithSecureSAXParserFactory() throws Exception {
+    SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser();
+    try (InputStream stream = getResourceStream("/xml/external-dtd.xml")) {
+      parser.parse(stream, new DefaultHandler());
+    }
+  }
+
+  @Test(expected = SAXException.class)
+  public void testEntityDtdWithSecureSAXParserFactory() throws Exception {
+    SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser();
+    try (InputStream stream = getResourceStream("/xml/entity-dtd.xml")) {
+      parser.parse(stream, new DefaultHandler());
+    }
+  }
+
+  @Test
+  public void testSecureTransformerFactory() throws Exception {
+    Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer();
+    DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    try (StringWriter stringWriter = new StringWriter()) {
+      transformer.transform(new DOMSource(doc), new StreamResult(stringWriter));
+      Assertions.assertThat(stringWriter.toString()).contains("<root");
+    }
+  }
+
+  @Test(expected = TransformerException.class)
+  public void testExternalDtdWithSecureTransformerFactory() throws Exception {
+    Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer();
+    try (
+        InputStream stream = getResourceStream("/xml/external-dtd.xml");
+        StringWriter stringWriter = new StringWriter()
+    ) {
+      transformer.transform(new StreamSource(stream), new StreamResult(stringWriter));
+    }
+  }
+
+  @Test
+  public void testSecureSAXTransformerFactory() throws Exception {
+    Transformer transformer = XMLUtils.newSecureSAXTransformerFactory().newTransformer();
+    DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    try (StringWriter stringWriter = new StringWriter()) {
+      transformer.transform(new DOMSource(doc), new StreamResult(stringWriter));
+      Assertions.assertThat(stringWriter.toString()).contains("<root");
+    }
+  }
+
+  @Test(expected = TransformerException.class)
+  public void testExternalDtdWithSecureSAXTransformerFactory() throws Exception {
+    Transformer transformer = XMLUtils.newSecureSAXTransformerFactory().newTransformer();
+    try (
+        InputStream stream = getResourceStream("/xml/external-dtd.xml");
+        StringWriter stringWriter = new StringWriter()
+    ) {
+      transformer.transform(new StreamSource(stream), new StreamResult(stringWriter));
+    }
+  }
+
+  private static InputStream getResourceStream(final String filename) {
+    return TestXMLUtils.class.getResourceAsStream(filename);
+  }
+}

+ 22 - 0
hadoop-common-project/hadoop-common/src/test/resources/xml/entity-dtd.xml

@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!DOCTYPE lolz [
+        <!ENTITY lol "lol">
+        <!ELEMENT lolz (#PCDATA)>
+        ]>
+<lolz>&lol;</lolz>

+ 23 - 0
hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml

@@ -0,0 +1,23 @@
+<?xml version = "1.0" encoding = "UTF-8" standalone = "no" ?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!DOCTYPE address SYSTEM "address.dtd">
+<address>
+    <name>First Last</name>
+    <company>Acme</company>
+    <phone>(555) 123-4567</phone>
+</address>

+ 3 - 1
hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java

@@ -25,6 +25,8 @@ import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 
+import org.apache.hadoop.util.XMLUtils;
+
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
@@ -55,7 +57,7 @@ public class JobConfigurationParser {
     Properties result = new Properties();
 
     try {
-      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
 
       DocumentBuilder db = dbf.newDocumentBuilder();
 

+ 7 - 11
hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java

@@ -17,28 +17,27 @@
  */
 package org.apache.hadoop.tools.rumen;
 
+import java.io.IOException;
+import java.io.StringReader;
 import java.util.Properties;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 
-import java.io.InputStream;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-
-import java.nio.charset.Charset;
-
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.ParserConfigurationException;
 
 import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.util.XMLUtils;
+
 import org.w3c.dom.Document;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Node;
 import org.w3c.dom.Element;
 import org.w3c.dom.Text;
 
+import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 
 class ParsedConfigFile {
@@ -46,7 +45,6 @@ class ParsedConfigFile {
       Pattern.compile("_(job_[0-9]+_[0-9]+)_");
   private static final Pattern heapPattern =
       Pattern.compile("-Xmx([0-9]+)([mMgG])");
-  private static final Charset UTF_8 = Charset.forName("UTF-8");
 
   final int heapMegabytes;
 
@@ -103,13 +101,11 @@ class ParsedConfigFile {
     }
 
     try {
-      InputStream is = new ByteArrayInputStream(xmlString.getBytes(UTF_8));
-
-      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
 
       DocumentBuilder db = dbf.newDocumentBuilder();
 
-      Document doc = db.parse(is);
+      Document doc = db.parse(new InputSource(new StringReader(xmlString)));
 
       Element root = doc.getDocumentElement();