1
0
Quellcode durchsuchen

HADOOP-14908. CrossOriginFilter should trigger regex on more input (Johannes Alberti via aw)

Allen Wittenauer vor 7 Jahren
Ursprung
Commit
4d5dd75b60

+ 20 - 7
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/http/CrossOriginFilter.java

@@ -37,6 +37,7 @@ import javax.servlet.http.HttpServletResponse;
 import org.apache.commons.lang.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;
+import java.util.stream.Collectors;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -66,6 +67,7 @@ public class CrossOriginFilter implements Filter {
   // Filter configuration
   public static final String ALLOWED_ORIGINS = "allowed-origins";
   public static final String ALLOWED_ORIGINS_DEFAULT = "*";
+  public static final String ALLOWED_ORIGINS_REGEX_PREFIX = "regex:";
   public static final String ALLOWED_METHODS = "allowed-methods";
   public static final String ALLOWED_METHODS_DEFAULT = "GET,POST,HEAD";
   public static final String ALLOWED_HEADERS = "allowed-headers";
@@ -194,6 +196,12 @@ public class CrossOriginFilter implements Filter {
     allowAllOrigins = allowedOrigins.contains("*");
     LOG.info("Allowed Origins: " + StringUtils.join(allowedOrigins, ','));
     LOG.info("Allow All Origins: " + allowAllOrigins);
+    List<String> discouragedAllowedOrigins = allowedOrigins.stream()
+            .filter(s -> s.length() > 1 && s.contains("*"))
+            .collect(Collectors.toList());
+    for (String discouragedAllowedOrigin : discouragedAllowedOrigins) {
+        LOG.warn("Allowed Origin pattern '" + discouragedAllowedOrigin + "' is discouraged, use the 'regex:' prefix and use a Java regular expression instead.");
+    }
   }
 
   private void initializeMaxAge(FilterConfig filterConfig) {
@@ -228,15 +236,20 @@ public class CrossOriginFilter implements Filter {
     String[] origins = originsList.trim().split("\\s+");
     for (String origin : origins) {
       for (String allowedOrigin : allowedOrigins) {
-        if (allowedOrigin.contains("*")) {
-          String regex = allowedOrigin.replace(".", "\\.").replace("*", ".*");
-          Pattern p = Pattern.compile(regex);
-          Matcher m = p.matcher(origin);
-          if (m.matches()) {
+        Pattern regexPattern = null;
+        if (allowedOrigin.startsWith(ALLOWED_ORIGINS_REGEX_PREFIX)) {
+            String regex = allowedOrigin.substring(ALLOWED_ORIGINS_REGEX_PREFIX.length());
+            regexPattern = Pattern.compile(regex);
+        } else if (allowedOrigin.contains("*")) {
+            String regex = allowedOrigin.replace(".", "\\.").replace("*", ".*");
+            regexPattern = Pattern.compile(regex);
+        }
+
+        if (regexPattern != null
+                && regexPattern.matcher(origin).matches()) {
             return true;
-          }
         } else if (allowedOrigin.equals(origin)) {
-          return true;
+            return true;
         }
       }
     }

+ 9 - 3
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -1861,9 +1861,15 @@
 <property>
   <name>hadoop.http.cross-origin.allowed-origins</name>
   <value>*</value>
-  <description>Comma separated list of origins that are allowed for web
-    services needing cross-origin (CORS) support. Wildcards (*) and patterns
-    allowed</description>
+  <description>Comma separated list of origins that are allowed for web services
+    needing cross-origin (CORS) support. If a value in the list contains an
+    asterix (*), a regex pattern, escaping any dots ('.' -> '\.') and replacing
+    the asterix such that it captures any characters ('*' -> '.*'), is generated.
+    Values prefixed with 'regex:' are interpreted directly as regular expressions,
+    e.g. use the expression 'regex:https?:\/\/foo\.bar:([0-9]+)?' to allow any
+    origin using the 'http' or 'https' protocol in the domain 'foo.bar' on any
+    port. The use of simple wildcards ('*') is discouraged, and only available for
+    backward compatibility.</description>
 </property>
 
 <property>

+ 1 - 1
hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md

@@ -60,7 +60,7 @@ Add org.apache.hadoop.security.HttpCrossOriginFilterInitializer to hadoop.http.f
 | Property                                 | Default Value                                 | Description                                                                            |
 |:---------------------------------------- |:--------------------------------------------- |:-------------------------------------------------------------------------------------  |
 | hadoop.http.cross-origin.enabled         | `false`                                       | Enables cross origin support for all web-services                                      |
-| hadoop.http.cross-origin.allowed-origins | `*`                                           | Comma separated list of origins that are allowed, wildcards (`*`) and patterns allowed |
+| hadoop.http.cross-origin.allowed-origins | `*`                                           | Comma separated list of origins that are allowed. Values prefixed with `regex:` are interpreted as regular expressions. Values containing wildcards (`*`) are possible as well, here a regular expression is generated, the use is discouraged and support is only available for backward compatibility. |
 | hadoop.http.cross-origin.allowed-methods | `GET,POST,HEAD`                               | Comma separated list of methods that are allowed                                       |
 | hadoop.http.cross-origin.allowed-headers | `X-Requested-With,Content-Type,Accept,Origin` | Comma separated list of headers that are allowed                                       |
 | hadoop.http.cross-origin.max-age         | `1800`                                        | Number of seconds a pre-flighted request can be cached                                 |

+ 79 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/http/TestCrossOriginFilter.java

@@ -127,6 +127,85 @@ public class TestCrossOriginFilter {
     Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com"));
   }
 
+  @Test
+  public void testRegexPatternMatchingOrigins() throws ServletException, IOException {
+
+    // Setup the configuration settings of the server
+    Map<String, String> conf = new HashMap<String, String>();
+    conf.put(CrossOriginFilter.ALLOWED_ORIGINS, "regex:.*[.]example[.]com");
+    FilterConfig filterConfig = new FilterConfigTest(conf);
+
+    // Object under test
+    CrossOriginFilter filter = new CrossOriginFilter();
+    filter.init(filterConfig);
+
+    // match multiple sub-domains
+    Assert.assertFalse(filter.areOriginsAllowed("example.com"));
+    Assert.assertFalse(filter.areOriginsAllowed("foo:example.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("foo.example.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("foo.bar.example.com"));
+
+    // First origin is allowed
+    Assert.assertTrue(filter.areOriginsAllowed("foo.example.com foo.nomatch.com"));
+    // Second origin is allowed
+    Assert.assertTrue(filter.areOriginsAllowed("foo.nomatch.com foo.example.com"));
+    // No origin in list is allowed
+    Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com"));
+  }
+
+  @Test
+  public void testComplexRegexPatternMatchingOrigins() throws ServletException, IOException {
+
+    // Setup the configuration settings of the server
+    Map<String, String> conf = new HashMap<String, String>();
+    conf.put(CrossOriginFilter.ALLOWED_ORIGINS, "regex:https?:\\/\\/sub1[.]example[.]com(:[0-9]+)?");
+    FilterConfig filterConfig = new FilterConfigTest(conf);
+
+    // Object under test
+    CrossOriginFilter filter = new CrossOriginFilter();
+    filter.init(filterConfig);
+
+    Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com:1234"));
+    Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com:8080"));
+
+    // No origin in list is allowed
+    Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com"));
+  }
+
+  @Test
+  public void testMixedRegexPatternMatchingOrigins() throws ServletException, IOException {
+
+    // Setup the configuration settings of the server
+    Map<String, String> conf = new HashMap<String, String>();
+    conf.put(CrossOriginFilter.ALLOWED_ORIGINS, "regex:https?:\\/\\/sub1[.]example[.]com(:[0-9]+)?, "
+            + "*.example2.com");
+    FilterConfig filterConfig = new FilterConfigTest(conf);
+
+    // Object under test
+    CrossOriginFilter filter = new CrossOriginFilter();
+    filter.init(filterConfig);
+
+    Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("http://sub1.example.com:1234"));
+    Assert.assertTrue(filter.areOriginsAllowed("https://sub1.example.com:8080"));
+
+    // match multiple sub-domains
+    Assert.assertFalse(filter.areOriginsAllowed("example2.com"));
+    Assert.assertFalse(filter.areOriginsAllowed("foo:example2.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("foo.example2.com"));
+    Assert.assertTrue(filter.areOriginsAllowed("foo.bar.example2.com"));
+
+    // First origin is allowed
+    Assert.assertTrue(filter.areOriginsAllowed("foo.example2.com foo.nomatch.com"));
+    // Second origin is allowed
+    Assert.assertTrue(filter.areOriginsAllowed("foo.nomatch.com foo.example2.com"));
+    // No origin in list is allowed
+    Assert.assertFalse(filter.areOriginsAllowed("foo.nomatch1.com foo.nomatch2.com"));
+  }
+
   @Test
   public void testDisallowedOrigin() throws ServletException, IOException {
 

+ 16 - 16
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServer.md

@@ -41,9 +41,9 @@ Previously this was supported purely for MapReduce jobs by the Application Histo
 With the introduction of the timeline server, the Application History Server becomes just one use of
 the Timeline Server.
 
-Generic information includes application level data such as 
+Generic information includes application level data such as
 
-* queue-name, 
+* queue-name,
 * user information and the like set in the `ApplicationSubmissionContext`,
 * a list of application-attempts that ran for an application
 * information about each application-attempt
@@ -68,7 +68,7 @@ Current status
 1. The "Timeline Server v1" REST API has been declared one of the REST APIs
   whose compatibility will be maintained in future releases.
 1. The single-server implementation of the Timeline Server places a limit on
-  the scalability of the service; it also prevents the service being 
+  the scalability of the service; it also prevents the service being
   High-Availability component of the YARN infrastructure.
 
 Future Plans
@@ -88,9 +88,9 @@ data structures as well as the ability of the client to failover between Timelin
 The Timeline Domain offers a namespace for Timeline server allowing
 users to host multiple entities, isolating them from other users and applications.
 Timeline server Security is defined at this level.
- 
+
 A "Domain" primarily stores owner info, read and& write ACL information,
-created and modified time stamp information. Each Domain is identified by an ID which 
+created and modified time stamp information. Each Domain is identified by an ID which
 must be unique across all users in the YARN cluster.
 
 #### Timeline Entity
@@ -111,7 +111,7 @@ Each Entity is uniquely identified by an `EntityId` and `EntityType`.
 #### Timeline Events
 
 A Timeline Event describes an event that is related to a specific
-Timeline Entity of an application. 
+Timeline Entity of an application.
 
 Users are free to define what an event means —such as starting
 an application, getting allocated a container,
@@ -156,7 +156,7 @@ and cluster operators.
 | `yarn.timeline-service.webapp.https.address` | The https address of the Timeline service web application. Defaults to `${yarn.timeline-service.hostname}:8190`. |
 | `yarn.timeline-service.bind-host` | The actual address the server will bind to. If this optional address is set, the RPC and webapp servers will bind to this address and the port specified in `yarn.timeline-service.address` and `yarn.timeline-service.webapp.address`, respectively. This is most useful for making the service listen on all interfaces by setting to `0.0.0.0`. |
 | `yarn.timeline-service.http-cross-origin.enabled` | Enables cross-origin support (CORS) for web services where cross-origin web response headers are needed. For example, javascript making a web services request to the timeline server. Defaults to `false`. |
-| `yarn.timeline-service.http-cross-origin.allowed-origins` | Comma separated list of origins that are allowed for web services needing cross-origin (CORS) support. Wildcards `(*)` and patterns allowed. Defaults to `*`. |
+| `yarn.timeline-service.http-cross-origin.allowed-origins` | Comma separated list of origins that are allowed. Values prefixed with `regex:` are interpreted as regular expressions. Values containing wildcards (`*`) are possible as well, here a regular expression is generated, the use is discouraged and support is only available for backward compatibility. Defaults to `*`. |
 | `yarn.timeline-service.http-cross-origin.allowed-methods` | Comma separated list of methods that are allowed for web services needing cross-origin (CORS) support. Defaults to `GET,POST,HEAD`. |
 | `yarn.timeline-service.http-cross-origin.allowed-headers` | Comma separated list of headers that are allowed for web services needing cross-origin (CORS) support. Defaults to `X-Requested-With,Content-Type,Accept,Origin`. |
 | `yarn.timeline-service.http-cross-origin.max-age` | The number of seconds a pre-flighted request can be cached for web services needing cross-origin (CORS) support. Defaults to `1800`. |
@@ -420,7 +420,7 @@ response: `TimelinePutResponse`
 
 ### List domains of a user: GET `/ws/v1/timeline/domain`
 
-Retrieves a list of all domains of a user. 
+Retrieves a list of all domains of a user.
 
 If an owner is specified, that owner name overrides that of the caller.
 
@@ -598,8 +598,8 @@ Request Body:
 Required fields
 
 Entity: `type` and `id`. `starttime` is required unless the
-entity contains one or more event). 
-Event: `type` and `timestamp`. 
+entity contains one or more event).
+Event: `type` and `timestamp`.
 
 ## <a name="REST_API_LIST_TIMELINE_ENTITIES"></a>Timeline Entity List
 
@@ -809,7 +809,7 @@ Response Body:
         }
       ]
     }
-  
+
 
 
 
@@ -1443,8 +1443,8 @@ None
 ### Elements of the `appattempts` (Application Attempt List) Object
 
 When you make a request for the list of application attempts, the information
-will be returned as a collection of application attempt objects. See 
-[Application Attempt](#REST_API_APPLICATION_ATTEMPT) for the syntax of 
+will be returned as a collection of application attempt objects. See
+[Application Attempt](#REST_API_APPLICATION_ATTEMPT) for the syntax of
 the application attempt object.
 
 | Item         | Data Type   | Description                  |
@@ -1758,7 +1758,7 @@ Response Body:
         }
       ]
     }
-    
+
 
 #### XML response
 
@@ -2004,8 +2004,8 @@ Response Body:
       <containerState>COMPLETE</containerState>
       <nodeHttpAddress>http://localhost:8042</nodeHttpAddress>
     </container>
- 
- 
+
+
 ### Response Codes
 
 1. Queries where a domain, entity type, entity ID or similar cannot be resolved result in