Przeglądaj źródła

[partial-ns] Introduce transaction.

Haohui Mai 10 lat temu
rodzic
commit
406987b2a5

+ 110 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DB.java

@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
+
+import java.nio.ByteBuffer;
+import java.util.AbstractMap;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID;
+import static org.apache.hadoop.hdfs.server.namenode.INodeId.ROOT_INODE_ID;
+
+class DB {
+  void addRoot(ByteString root) {
+    INodeContainer c = new INodeContainer();
+    c.inode(root);
+    inodeMap.put(ROOT_INODE_ID, c);
+  }
+
+  static class INodeContainer {
+    private static final NavigableMap<ByteBuffer, Long> EMPTY_MAP =
+        Collections.unmodifiableNavigableMap(new TreeMap<ByteBuffer, Long>());
+    private ByteString inode;
+    /**
+     * NOTE: The caller is responsible to ensure that the container owns the
+     * ByteBuffer.
+     */
+    private TreeMap<ByteBuffer, Long> children;
+    ByteString inode() {
+      return inode;
+    }
+
+    void inode(ByteString inode) {
+      this.inode = inode;
+    }
+
+    long getChild(ByteBuffer childName) {
+      return children == null ? INVALID_INODE_ID : children
+          .getOrDefault(childName, INVALID_INODE_ID);
+    }
+
+    Map<ByteBuffer, Long> ensureChildrenList() {
+      if (children == null) {
+        children = new TreeMap<>();
+      }
+      return children;
+    }
+
+    Map<ByteBuffer, Long> children() {
+      return children;
+    }
+
+    NavigableMap<ByteBuffer, Long> readOnlyChildren() {
+      return children == null ? EMPTY_MAP : Collections
+          .unmodifiableNavigableMap(children);
+    }
+  }
+
+  private ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+
+  private HashMap<Long, INodeContainer> inodeMap = new
+      HashMap<>();
+
+  HashMap<Long, INodeContainer> inodeMap() {
+    return inodeMap;
+  }
+
+  DB.INodeContainer ensureContainer(long inodeId) {
+    DB.INodeContainer c = inodeMap.get(inodeId);
+    if (c == null) {
+      c = new DB.INodeContainer();
+      inodeMap.put(inodeId, c);
+    }
+    return c;
+  }
+
+  INodeContainer getINode(long id) {
+    return inodeMap.get(id);
+  }
+
+  ReentrantReadWriteLock lock() {
+    return lock;
+  }
+}

+ 57 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import com.google.protobuf.ByteString;
 import com.google.protobuf.InvalidProtocolBufferException;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -66,6 +67,7 @@ import org.slf4j.LoggerFactory;
 import java.io.Closeable;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.EnumSet;
@@ -83,8 +85,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KE
 import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.*;
 import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE;
 import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_FILE_ENCRYPTION_INFO;
-import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER;
+import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID;
+import static org.apache.hadoop.hdfs.server.namenode.INodeId.ROOT_INODE_ID;
 import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID;
+import static org.apache.hadoop.util.Time.now;
 
 /**
  * Both FSDirectory and FSNamesystem manage the state of the namespace.
@@ -99,7 +103,7 @@ public class FSDirectory implements Closeable {
 
   private static INodeDirectory createRoot(FSNamesystem namesystem) {
     final INodeDirectory r = new INodeDirectory(
-        INodeId.ROOT_INODE_ID,
+        ROOT_INODE_ID,
         INodeDirectory.ROOT_NAME,
         namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
         0L);
@@ -113,6 +117,20 @@ public class FSDirectory implements Closeable {
     return r;
   }
 
+  private ByteString createRootForFlatNS(FSNamesystem namesystem) {
+    PermissionStatus perm = namesystem.createFsOwnerPermissions(
+        new FsPermission((short) 0755));
+    ByteString b = new FlatINode.Builder()
+        .id(ROOT_INODE_ID)
+        .parentId(INVALID_INODE_ID)
+        .userId(ugid.getId(perm.getUserName()))
+        .groupId(ugid.getId(perm.getGroupName()))
+        .permission((short) 0755)
+        .mtime(now())
+        .build();
+    return b;
+  }
+
   @VisibleForTesting
   static boolean CHECK_RESERVED_FILE_NAMES = true;
   public final static String DOT_RESERVED_STRING = ".reserved";
@@ -213,6 +231,30 @@ public class FSDirectory implements Closeable {
    */
   private final NameCache<ByteArray> nameCache;
 
+  private final DB db;
+  // Mapping user / group name into id
+  private final StringMap ugid = new StringMap();
+
+  DB db() {
+    return db;
+  }
+
+  StringMap ugid() {
+    return ugid;
+  }
+
+  RWTransaction newRWTransaction() {
+    return new RWTransaction(this);
+  }
+
+  public ROTransaction newROTransaction() {
+    return new ROTransaction(db());
+  }
+
+  public ReplayTransaction newReplayTransaction() {
+    return new ReplayTransaction(this);
+  }
+
   FSDirectory(FSNamesystem ns, Configuration conf) throws IOException {
     this.dirLock = new ReentrantReadWriteLock(true); // fair
     this.inodeId = new INodeId();
@@ -284,10 +326,8 @@ public class FSDirectory implements Closeable {
     // to 64MB. This means we can only store approximately 6.7 million entries
     // per directory, but let's use 6.4 million for some safety.
     final int MAX_DIR_ITEMS = 64 * 100 * 1000;
-    Preconditions.checkArgument(
-        maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS, "Cannot set "
-            + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY
-            + " to a value less than 1 or greater than " + MAX_DIR_ITEMS);
+    Preconditions.checkArgument(maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS,
+                                "Cannot set " + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY + " to a value less than 1 or greater than " + MAX_DIR_ITEMS);
 
     int threshold = conf.getInt(
         DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
@@ -298,6 +338,9 @@ public class FSDirectory implements Closeable {
     namesystem = ns;
     this.editLog = ns.getEditLog();
     ezManager = new EncryptionZoneManager(this, conf);
+    this.db = new DB();
+    // TODO: Load fsimage
+    db.addRoot(createRootForFlatNS(ns));
   }
     
   FSNamesystem getFSNamesystem() {
@@ -1364,7 +1407,7 @@ public class FSDirectory implements Closeable {
     } catch (NumberFormatException e) {
       throw new FileNotFoundException("Invalid inode path: " + src);
     }
-    if (id == INodeId.ROOT_INODE_ID && pathComponents.length == 4) {
+    if (id == ROOT_INODE_ID && pathComponents.length == 4) {
       return Path.SEPARATOR;
     }
     INode inode = fsd.getInode(id);
@@ -1377,7 +1420,7 @@ public class FSDirectory implements Closeable {
     if ((pathComponents.length > 4)
         && DFSUtil.bytes2String(pathComponents[4]).equals("..")) {
       INode parent = inode.getParent();
-      if (parent == null || parent.getId() == INodeId.ROOT_INODE_ID) {
+      if (parent == null || parent.getId() == ROOT_INODE_ID) {
         // inode is root, or its parent is root.
         return Path.SEPARATOR;
       } else {
@@ -1386,7 +1429,7 @@ public class FSDirectory implements Closeable {
     }
 
     String path = "";
-    if (id != INodeId.ROOT_INODE_ID) {
+    if (id != ROOT_INODE_ID) {
       path = inode.getFullPathName();
     }
     return constructRemainingPath(path, pathComponents, 4);
@@ -1580,6 +1623,11 @@ public class FSDirectory implements Closeable {
             false) : null;
   }
 
+  public HdfsFileStatus getAuditFileInfo(FlatINodesInPath iip) {
+    // TODO
+    return null;
+  }
+
   /**
    * Verify that parent directory of src exists.
    */

+ 55 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ROTransaction.java

@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import java.util.NavigableMap;
+
+class ROTransaction extends Transaction {
+  ROTransaction(FSDirectory fsd) {
+    super(fsd);
+  }
+
+  ROTransaction begin() {
+    fsd.readLock();
+    return this;
+  }
+
+  @Override
+  FlatINode getINode(long id) {
+    return getINodeFromDB(id);
+  }
+
+  @Override
+  long getChild(long parentId, ByteBuffer localName) {
+    return getChildFromDB(parentId, localName);
+  }
+
+  @Override
+  NavigableMap<ByteBuffer, Long> childrenView(long parent) {
+    DB.INodeContainer c = fsd.db().getINode(parent);
+    return c.readOnlyChildren();
+  }
+
+  @Override
+  public void close() throws IOException {
+    fsd.readUnlock();
+  }
+}

+ 157 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RWTransaction.java

@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import com.google.protobuf.ByteString;
+import org.apache.hadoop.fs.Options;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID;
+
+class RWTransaction extends Transaction {
+  private static final long DELETED_INODE_ID = INVALID_INODE_ID;
+  private static final ByteString DELETED_INODE = ByteString.EMPTY;
+
+  private HashMap<Long, DB.INodeContainer> inodeMap = new HashMap<>();
+
+  RWTransaction(FSDirectory fsd) {
+    super(fsd);
+  }
+
+  RWTransaction begin() {
+    fsd.writeLock();
+    return this;
+  }
+
+  @Override
+  FlatINode getINode(long id) {
+    DB.INodeContainer c = inodeMap.get(id);
+    if (c == null) {
+      return getINodeFromDB(id);
+    }
+    return c.inode() == DELETED_INODE ? null : FlatINode.wrap(c.inode());
+  }
+
+  @Override
+  long getChild(long parentId, ByteBuffer localName) {
+    DB.INodeContainer c = inodeMap.get(parentId);
+    if (c == null || c.children() == null) {
+      return getChildFromDB(parentId, localName);
+    }
+    Long id = c.children().get(localName);
+    return id == null || id == DELETED_INODE_ID ? INVALID_INODE_ID : id;
+  }
+
+  @Override
+  NavigableMap<ByteBuffer, Long> childrenView(long parent) {
+    // TODO: This function only provides a read-only view for the content in
+    // the DB. It needs to consider the modification in this transaction to
+    // implement transactional semantic.
+    DB.INodeContainer c = fsd.db().getINode(parent);
+    return c.readOnlyChildren();
+  }
+
+  @Override
+  public void close() throws IOException {
+    fsd.writeUnlock();
+  }
+
+  void putINode(long id, ByteString inode) {
+    DB.INodeContainer c = ensureContainer(id);
+    c.inode(inode);
+  }
+
+  void putChild(long parentId, ByteBuffer localName, long id) {
+    DB.INodeContainer c = ensureContainer(parentId);
+    ByteString s = ByteString.copyFrom(localName);
+    c.ensureChildrenList().put(s.asReadOnlyByteBuffer(), id);
+  }
+
+  void deleteINode(long inodeId) {
+    putINode(inodeId, DELETED_INODE);
+  }
+
+  void deleteChild(long parentId, ByteBuffer localName) {
+    putChild(parentId, localName, DELETED_INODE_ID);
+  }
+
+  long allocateNewInodeId() {
+    return fsd.allocateNewInodeId();
+  }
+
+  int getStringId(String str) {
+    return fsd.ugid().getId(str);
+  }
+
+  private DB.INodeContainer ensureContainer(long inodeId) {
+    DB.INodeContainer c = inodeMap.get(inodeId);
+    if (c == null) {
+      c = new DB.INodeContainer();
+      inodeMap.put(inodeId, c);
+    }
+    return c;
+  }
+
+  void commit() {
+    for (Map.Entry<Long, DB.INodeContainer> e : inodeMap.entrySet()) {
+      long id = e.getKey();
+      DB.INodeContainer c = e.getValue();
+      if (c.inode() == DELETED_INODE) {
+        fsd.db().inodeMap().remove(id);
+        continue;
+      }
+
+      DB.INodeContainer dbContainer = fsd.db().ensureContainer(id);
+      if (c.inode() != null) {
+        dbContainer.inode(c.inode());
+      }
+
+      if (c.children() == null) {
+        continue;
+      }
+
+      for (Map.Entry<ByteBuffer, Long> e1 : c.children().entrySet()) {
+        ByteBuffer childName = e1.getKey();
+        long childId = e1.getValue();
+        if (e1.getValue() == INVALID_INODE_ID) {
+          if (dbContainer.children() != null) {
+            dbContainer.children().remove(childName);
+          }
+        } else {
+          dbContainer.ensureChildrenList().put(childName, childId);
+        }
+      }
+    }
+    inodeMap.clear();
+  }
+
+  public void logMkDir(FlatINodesInPath iip) {
+    fsd.getEditLog().logMkDir(iip, fsd.ugid());
+  }
+
+  public void logRename(
+      String src, String dst, long mtime, boolean logRetryCache,
+      Options.Rename[] options) {
+    fsd.getEditLog().logRename(src, dst, mtime, logRetryCache, options);
+  }
+}

+ 42 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReplayTransaction.java

@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+/**
+ * The transaction that is used when replaying edit log. It diverges from
+ * {@link RWTransaction} in the following:
+ * <ul>
+ *   <li>It does not hold the write lock of the FSDirectory as the lock has
+ *   acquired by the caller.</li>
+ *   <li>It makes changes directly to the DB.(TODO)</li>
+ * </ul>
+ */
+public class ReplayTransaction extends RWTransaction {
+  ReplayTransaction(FSDirectory fsd) {
+    super(fsd);
+  }
+
+  ReplayTransaction begin() {
+    return this;
+  }
+
+  @Override
+  public void close() throws IOException {}
+}

+ 52 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Transaction.java

@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import io.netty.buffer.ByteBuf;
+
+import java.io.Closeable;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID;
+
+abstract class Transaction implements Closeable {
+  protected final FSDirectory fsd;
+  protected Transaction(FSDirectory fsd) {
+    this.fsd = fsd;
+  }
+
+  abstract FlatINode getINode(long id);
+  abstract long getChild(long parentId, ByteBuffer localName);
+
+  abstract NavigableMap<ByteBuffer, Long> childrenView(long parent);
+
+  protected FlatINode getINodeFromDB(long id) {
+    DB.INodeContainer c = fsd.db().getINode(id);
+    return c == null ? null : FlatINode.wrap(c.inode());
+  }
+
+  protected long getChildFromDB(long parentId, ByteBuffer localName) {
+    DB.INodeContainer c = fsd.db().getINode(parentId);
+    return c == null ? INVALID_INODE_ID : c.getChild(localName);
+  }
+
+
+}