|
@@ -0,0 +1,377 @@
|
|
|
+/*
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
+ * distributed with this work for additional information
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
+ * limitations under the License.
|
|
|
+ */
|
|
|
+
|
|
|
+package org.apache.hadoop.util;
|
|
|
+
|
|
|
+import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
+
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.Collection;
|
|
|
+import java.util.Collections;
|
|
|
+import java.util.EnumSet;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.Iterator;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.Set;
|
|
|
+import java.util.SortedSet;
|
|
|
+import java.util.TreeSet;
|
|
|
+import java.util.concurrent.ConcurrentHashMap;
|
|
|
+
|
|
|
+/**
|
|
|
+ * Static utility methods pertaining to {@link Set} instances.
|
|
|
+ * This class is Hadoop's internal use alternative to Guava's Sets
|
|
|
+ * utility class.
|
|
|
+ * Javadocs for majority of APIs in this class are taken from Guava's Sets
|
|
|
+ * class from Guava release version 27.0-jre.
|
|
|
+ */
|
|
|
+@InterfaceAudience.Private
|
|
|
+public final class Sets {
|
|
|
+
|
|
|
+ private static final int MAX_POWER_OF_TWO = 1 << (Integer.SIZE - 2);
|
|
|
+
|
|
|
+ private Sets() {
|
|
|
+ // empty
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a <i>mutable</i>, initially empty {@code HashSet} instance.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of()
|
|
|
+ * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf}
|
|
|
+ * instead. Otherwise, strongly consider using a {@code LinkedHashSet}
|
|
|
+ * instead, at the cost of increased memory footprint, to get
|
|
|
+ * deterministic iteration behavior.
|
|
|
+ */
|
|
|
+ public static <E> HashSet<E> newHashSet() {
|
|
|
+ return new HashSet<E>();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the
|
|
|
+ * natural sort ordering of its elements.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of()
|
|
|
+ * instead.
|
|
|
+ *
|
|
|
+ * @return a new, empty {@code TreeSet}
|
|
|
+ */
|
|
|
+ public static <E extends Comparable> TreeSet<E> newTreeSet() {
|
|
|
+ return new TreeSet<E>();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a <i>mutable</i> {@code HashSet} instance initially containing
|
|
|
+ * the given elements.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if elements are non-null and won't be added or removed
|
|
|
+ * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[])
|
|
|
+ * instead. If {@code E} is an {@link Enum} type, use
|
|
|
+ * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider
|
|
|
+ * using a {@code LinkedHashSet} instead, at the cost of increased memory
|
|
|
+ * footprint, to get deterministic iteration behavior.
|
|
|
+ *
|
|
|
+ * <p>This method is just a small convenience, either for
|
|
|
+ * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an
|
|
|
+ * empty set then calling {@link Collections#addAll}.
|
|
|
+ */
|
|
|
+ @SafeVarargs
|
|
|
+ public static <E> HashSet<E> newHashSet(E... elements) {
|
|
|
+ HashSet<E> set = newHashSetWithExpectedSize(elements.length);
|
|
|
+ Collections.addAll(set, elements);
|
|
|
+ return set;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a <i>mutable</i> {@code HashSet} instance containing the given
|
|
|
+ * elements. A very thin convenience for creating an empty set then calling
|
|
|
+ * {@link Collection#addAll} or Iterables#addAll.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if mutability is not required and the elements are
|
|
|
+ * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change
|
|
|
+ * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.)
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use
|
|
|
+ * newEnumSet(Iterable, Class) instead.
|
|
|
+ */
|
|
|
+ public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) {
|
|
|
+ return (elements instanceof Collection)
|
|
|
+ ? new HashSet<E>(cast(elements))
|
|
|
+ : newHashSet(elements.iterator());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a <i>mutable</i> {@code TreeSet} instance containing the given
|
|
|
+ * elements sorted by their natural ordering.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if mutability is not required, use
|
|
|
+ * ImmutableSortedSet#copyOf(Iterable) instead.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an
|
|
|
+ * explicit comparator, this method has different behavior than
|
|
|
+ * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet}
|
|
|
+ * with that comparator.
|
|
|
+ *
|
|
|
+ * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and
|
|
|
+ * should be treated as deprecated. Instead, use the {@code TreeSet}
|
|
|
+ * constructor directly, taking advantage of the new
|
|
|
+ * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>.
|
|
|
+ *
|
|
|
+ * <p>This method is just a small convenience for creating an empty set and
|
|
|
+ * then calling Iterables#addAll. This method is not very useful and will
|
|
|
+ * likely be deprecated in the future.
|
|
|
+ *
|
|
|
+ * @param elements the elements that the set should contain
|
|
|
+ * @return a new {@code TreeSet} containing those elements (minus duplicates)
|
|
|
+ */
|
|
|
+ public static <E extends Comparable> TreeSet<E> newTreeSet(
|
|
|
+ Iterable<? extends E> elements) {
|
|
|
+ TreeSet<E> set = newTreeSet();
|
|
|
+ addAll(set, elements);
|
|
|
+ return set;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static <E extends Comparable> boolean addAll(TreeSet<E> addTo,
|
|
|
+ Iterable<? extends E> elementsToAdd) {
|
|
|
+ if (elementsToAdd instanceof Collection) {
|
|
|
+ Collection<? extends E> c = cast(elementsToAdd);
|
|
|
+ return addTo.addAll(c);
|
|
|
+ }
|
|
|
+ if (elementsToAdd == null) {
|
|
|
+ throw new NullPointerException();
|
|
|
+ }
|
|
|
+ return addAll(addTo, elementsToAdd.iterator());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a <i>mutable</i> {@code HashSet} instance containing the given
|
|
|
+ * elements. A very thin convenience for creating an empty set and then
|
|
|
+ * calling Iterators#addAll.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if mutability is not required and the elements are
|
|
|
+ * non-null, use ImmutableSet#copyOf(Iterator) instead.
|
|
|
+ *
|
|
|
+ * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create
|
|
|
+ * an {@link EnumSet} instead.
|
|
|
+ *
|
|
|
+ * <p>Overall, this method is not very useful and will likely be deprecated
|
|
|
+ * in the future.
|
|
|
+ */
|
|
|
+ public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) {
|
|
|
+ HashSet<E> set = newHashSet();
|
|
|
+ addAll(set, elements);
|
|
|
+ return set;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns a new hash set using the smallest initial table size that can hold
|
|
|
+ * {@code expectedSize} elements without resizing. Note that this is not what
|
|
|
+ * {@link HashSet#HashSet(int)} does, but it is what most users want and
|
|
|
+ * expect it to do.
|
|
|
+ *
|
|
|
+ * <p>This behavior can't be broadly guaranteed, but has been tested with
|
|
|
+ * OpenJDK 1.7 and 1.8.
|
|
|
+ *
|
|
|
+ * @param expectedSize the number of elements you expect to add to the
|
|
|
+ * returned set
|
|
|
+ * @return a new, empty hash set with enough capacity to hold
|
|
|
+ * {@code expectedSize} elements without resizing
|
|
|
+ * @throws IllegalArgumentException if {@code expectedSize} is negative
|
|
|
+ */
|
|
|
+ public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) {
|
|
|
+ return new HashSet<E>(capacity(expectedSize));
|
|
|
+ }
|
|
|
+
|
|
|
+ private static <E> Collection<E> cast(Iterable<E> iterable) {
|
|
|
+ return (Collection<E>) iterable;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static <E> boolean addAll(Collection<E> addTo,
|
|
|
+ Iterator<? extends E> iterator) {
|
|
|
+ if (addTo == null) {
|
|
|
+ throw new NullPointerException();
|
|
|
+ }
|
|
|
+ if (iterator == null) {
|
|
|
+ throw new NullPointerException();
|
|
|
+ }
|
|
|
+ boolean wasModified = false;
|
|
|
+ while (iterator.hasNext()) {
|
|
|
+ wasModified |= addTo.add(iterator.next());
|
|
|
+ }
|
|
|
+ return wasModified;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the intersection of two sets as an unmodifiable set.
|
|
|
+ * The returned set contains all elements that are contained by both backing
|
|
|
+ * sets.
|
|
|
+ *
|
|
|
+ * <p>Results are undefined if {@code set1} and {@code set2} are sets based
|
|
|
+ * on different equivalence relations (as {@code HashSet}, {@code TreeSet},
|
|
|
+ * and the keySet of an {@code IdentityHashMap} all are).
|
|
|
+ */
|
|
|
+ public static <E> Set<E> intersection(final Set<E> set1,
|
|
|
+ final Set<E> set2) {
|
|
|
+ if (set1 == null) {
|
|
|
+ throw new NullPointerException("set1");
|
|
|
+ }
|
|
|
+ if (set2 == null) {
|
|
|
+ throw new NullPointerException("set2");
|
|
|
+ }
|
|
|
+ Set<E> newSet = new HashSet<>(set1);
|
|
|
+ newSet.retainAll(set2);
|
|
|
+ return Collections.unmodifiableSet(newSet);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the union of two sets as an unmodifiable set.
|
|
|
+ * The returned set contains all elements that are contained in either
|
|
|
+ * backing set.
|
|
|
+ *
|
|
|
+ * <p>Results are undefined if {@code set1} and {@code set2} are sets
|
|
|
+ * based on different equivalence relations (as {@link HashSet},
|
|
|
+ * {@link TreeSet}, and the {@link Map#keySet} of an
|
|
|
+ * {@code IdentityHashMap} all are).
|
|
|
+ */
|
|
|
+ public static <E> Set<E> union(
|
|
|
+ final Set<E> set1, final Set<E> set2) {
|
|
|
+ if (set1 == null) {
|
|
|
+ throw new NullPointerException("set1");
|
|
|
+ }
|
|
|
+ if (set2 == null) {
|
|
|
+ throw new NullPointerException("set2");
|
|
|
+ }
|
|
|
+ Set<E> newSet = new HashSet<>(set1);
|
|
|
+ newSet.addAll(set2);
|
|
|
+ return Collections.unmodifiableSet(newSet);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the difference of two sets as an unmodifiable set.
|
|
|
+ * The returned set contains all elements that are contained by {@code set1}
|
|
|
+ * and not contained by {@code set2}.
|
|
|
+ *
|
|
|
+ * <p>Results are undefined if {@code set1} and {@code set2} are sets based
|
|
|
+ * on different equivalence relations (as {@code HashSet}, {@code TreeSet},
|
|
|
+ * and the keySet of an {@code IdentityHashMap} all are).
|
|
|
+ *
|
|
|
+ * This method is used to find difference for HashSets. For TreeSets with
|
|
|
+ * strict order requirement, recommended method is
|
|
|
+ * {@link #differenceInTreeSets(Set, Set)}.
|
|
|
+ */
|
|
|
+ public static <E> Set<E> difference(
|
|
|
+ final Set<E> set1, final Set<E> set2) {
|
|
|
+ if (set1 == null) {
|
|
|
+ throw new NullPointerException("set1");
|
|
|
+ }
|
|
|
+ if (set2 == null) {
|
|
|
+ throw new NullPointerException("set2");
|
|
|
+ }
|
|
|
+ Set<E> newSet = new HashSet<>(set1);
|
|
|
+ newSet.removeAll(set2);
|
|
|
+ return Collections.unmodifiableSet(newSet);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the difference of two sets as an unmodifiable set.
|
|
|
+ * The returned set contains all elements that are contained by {@code set1}
|
|
|
+ * and not contained by {@code set2}.
|
|
|
+ *
|
|
|
+ * <p>Results are undefined if {@code set1} and {@code set2} are sets based
|
|
|
+ * on different equivalence relations (as {@code HashSet}, {@code TreeSet},
|
|
|
+ * and the keySet of an {@code IdentityHashMap} all are).
|
|
|
+ *
|
|
|
+ * This method is used to find difference for TreeSets. For HashSets,
|
|
|
+ * recommended method is {@link #difference(Set, Set)}.
|
|
|
+ */
|
|
|
+ public static <E> Set<E> differenceInTreeSets(
|
|
|
+ final Set<E> set1, final Set<E> set2) {
|
|
|
+ if (set1 == null) {
|
|
|
+ throw new NullPointerException("set1");
|
|
|
+ }
|
|
|
+ if (set2 == null) {
|
|
|
+ throw new NullPointerException("set2");
|
|
|
+ }
|
|
|
+ Set<E> newSet = new TreeSet<>(set1);
|
|
|
+ newSet.removeAll(set2);
|
|
|
+ return Collections.unmodifiableSet(newSet);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the symmetric difference of two sets as an unmodifiable set.
|
|
|
+ * The returned set contains all elements that are contained in either
|
|
|
+ * {@code set1} or {@code set2} but not in both. The iteration order of the
|
|
|
+ * returned set is undefined.
|
|
|
+ *
|
|
|
+ * <p>Results are undefined if {@code set1} and {@code set2} are sets based
|
|
|
+ * on different equivalence relations (as {@code HashSet}, {@code TreeSet},
|
|
|
+ * and the keySet of an {@code IdentityHashMap} all are).
|
|
|
+ */
|
|
|
+ public static <E> Set<E> symmetricDifference(
|
|
|
+ final Set<E> set1, final Set<E> set2) {
|
|
|
+ if (set1 == null) {
|
|
|
+ throw new NullPointerException("set1");
|
|
|
+ }
|
|
|
+ if (set2 == null) {
|
|
|
+ throw new NullPointerException("set2");
|
|
|
+ }
|
|
|
+ Set<E> intersection = new HashSet<>(set1);
|
|
|
+ intersection.retainAll(set2);
|
|
|
+ Set<E> symmetricDifference = new HashSet<>(set1);
|
|
|
+ symmetricDifference.addAll(set2);
|
|
|
+ symmetricDifference.removeAll(intersection);
|
|
|
+ return Collections.unmodifiableSet(symmetricDifference);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a thread-safe set backed by a hash map. The set is backed by a
|
|
|
+ * {@link ConcurrentHashMap} instance, and thus carries the same concurrency
|
|
|
+ * guarantees.
|
|
|
+ *
|
|
|
+ * <p>Unlike {@code HashSet}, this class does NOT allow {@code null} to be
|
|
|
+ * used as an element. The set is serializable.
|
|
|
+ *
|
|
|
+ * @return a new, empty thread-safe {@code Set}
|
|
|
+ */
|
|
|
+ public static <E> Set<E> newConcurrentHashSet() {
|
|
|
+ return Collections.newSetFromMap(new ConcurrentHashMap<E, Boolean>());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns a capacity that is sufficient to keep the map from being resized
|
|
|
+ * as long as it grows no larger than expectedSize and the load factor
|
|
|
+ * is ≥ its default (0.75).
|
|
|
+ * The implementation of this method is adapted from Guava version 27.0-jre.
|
|
|
+ */
|
|
|
+ private static int capacity(int expectedSize) {
|
|
|
+ if (expectedSize < 3) {
|
|
|
+ if (expectedSize < 0) {
|
|
|
+ throw new IllegalArgumentException(
|
|
|
+ "expectedSize cannot be negative but was: " + expectedSize);
|
|
|
+ }
|
|
|
+ return expectedSize + 1;
|
|
|
+ }
|
|
|
+ if (expectedSize < MAX_POWER_OF_TWO) {
|
|
|
+ // This is the calculation used in JDK8 to resize when a putAll
|
|
|
+ // happens; it seems to be the most conservative calculation we
|
|
|
+ // can make. 0.75 is the default load factor.
|
|
|
+ return (int) ((float) expectedSize / 0.75F + 1.0F);
|
|
|
+ }
|
|
|
+ return Integer.MAX_VALUE; // any large value
|
|
|
+ }
|
|
|
+
|
|
|
+}
|