001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.util.Time.now;
021    
022    import java.io.Closeable;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.util.ArrayList;
026    import java.util.Arrays;
027    import java.util.List;
028    import java.util.concurrent.TimeUnit;
029    import java.util.concurrent.locks.Condition;
030    import java.util.concurrent.locks.ReentrantReadWriteLock;
031    
032    import org.apache.hadoop.HadoopIllegalArgumentException;
033    import org.apache.hadoop.conf.Configuration;
034    import org.apache.hadoop.fs.ContentSummary;
035    import org.apache.hadoop.fs.FileAlreadyExistsException;
036    import org.apache.hadoop.fs.Options;
037    import org.apache.hadoop.fs.Options.Rename;
038    import org.apache.hadoop.fs.ParentNotDirectoryException;
039    import org.apache.hadoop.fs.Path;
040    import org.apache.hadoop.fs.PathIsNotDirectoryException;
041    import org.apache.hadoop.fs.UnresolvedLinkException;
042    import org.apache.hadoop.fs.permission.AclEntry;
043    import org.apache.hadoop.fs.permission.AclStatus;
044    import org.apache.hadoop.fs.permission.FsAction;
045    import org.apache.hadoop.fs.permission.FsPermission;
046    import org.apache.hadoop.fs.permission.PermissionStatus;
047    import org.apache.hadoop.hdfs.DFSConfigKeys;
048    import org.apache.hadoop.hdfs.DFSUtil;
049    import org.apache.hadoop.hdfs.DistributedFileSystem;
050    import org.apache.hadoop.hdfs.protocol.AclException;
051    import org.apache.hadoop.hdfs.protocol.Block;
052    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
053    import org.apache.hadoop.hdfs.protocol.DirectoryListing;
054    import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
055    import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
056    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
057    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
058    import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
059    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
060    import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
061    import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
062    import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
063    import org.apache.hadoop.hdfs.protocol.SnapshotException;
064    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
065    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
066    import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
067    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
068    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
069    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
070    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
071    import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
072    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
073    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
074    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root;
075    import org.apache.hadoop.hdfs.util.ByteArray;
076    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
077    import org.apache.hadoop.hdfs.util.ReadOnlyList;
078    
079    import com.google.common.annotations.VisibleForTesting;
080    import com.google.common.base.Preconditions;
081    
082    /*************************************************
083     * FSDirectory stores the filesystem directory state.
084     * It handles writing/loading values to disk, and logging
085     * changes as we go.
086     *
087     * It keeps the filename->blockset mapping always-current
088     * and logged to disk.
089     * 
090     *************************************************/
091    public class FSDirectory implements Closeable {
092      private static INodeDirectorySnapshottable createRoot(FSNamesystem namesystem) {
093        final INodeDirectory r = new INodeDirectory(
094            INodeId.ROOT_INODE_ID,
095            INodeDirectory.ROOT_NAME,
096            namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
097            0L);
098        r.addDirectoryWithQuotaFeature(
099            DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA,
100            DirectoryWithQuotaFeature.DEFAULT_DISKSPACE_QUOTA);
101        final INodeDirectorySnapshottable s = new INodeDirectorySnapshottable(r);
102        s.setSnapshotQuota(0);
103        return s;
104      }
105    
106      @VisibleForTesting
107      static boolean CHECK_RESERVED_FILE_NAMES = true;
108      public final static String DOT_RESERVED_STRING = ".reserved";
109      public final static String DOT_RESERVED_PATH_PREFIX = Path.SEPARATOR
110          + DOT_RESERVED_STRING;
111      public final static byte[] DOT_RESERVED = 
112          DFSUtil.string2Bytes(DOT_RESERVED_STRING);
113      public final static String DOT_INODES_STRING = ".inodes";
114      public final static byte[] DOT_INODES = 
115          DFSUtil.string2Bytes(DOT_INODES_STRING);
116      INodeDirectory rootDir;
117      FSImage fsImage;  
118      private final FSNamesystem namesystem;
119      private volatile boolean ready = false;
120      private final int maxComponentLength;
121      private final int maxDirItems;
122      private final int lsLimit;  // max list limit
123      private final int contentCountLimit; // max content summary counts per run
124      private final INodeMap inodeMap; // Synchronized by dirLock
125      private long yieldCount = 0; // keep track of lock yield count.
126    
127      // lock to protect the directory and BlockMap
128      private final ReentrantReadWriteLock dirLock;
129      private final Condition cond;
130    
131      // utility methods to acquire and release read lock and write lock
132      void readLock() {
133        this.dirLock.readLock().lock();
134      }
135    
136      void readUnlock() {
137        this.dirLock.readLock().unlock();
138      }
139    
140      void writeLock() {
141        this.dirLock.writeLock().lock();
142      }
143    
144      void writeUnlock() {
145        this.dirLock.writeLock().unlock();
146      }
147    
148      boolean hasWriteLock() {
149        return this.dirLock.isWriteLockedByCurrentThread();
150      }
151    
152      boolean hasReadLock() {
153        return this.dirLock.getReadHoldCount() > 0;
154      }
155    
156      public int getReadHoldCount() {
157        return this.dirLock.getReadHoldCount();
158      }
159    
160      public int getWriteHoldCount() {
161        return this.dirLock.getWriteHoldCount();
162      }
163    
164      /**
165       * Caches frequently used file names used in {@link INode} to reuse 
166       * byte[] objects and reduce heap usage.
167       */
168      private final NameCache<ByteArray> nameCache;
169    
170      FSDirectory(FSImage fsImage, FSNamesystem ns, Configuration conf) {
171        this.dirLock = new ReentrantReadWriteLock(true); // fair
172        this.cond = dirLock.writeLock().newCondition();
173        rootDir = createRoot(ns);
174        inodeMap = INodeMap.newInstance(rootDir);
175        this.fsImage = fsImage;
176        int configuredLimit = conf.getInt(
177            DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
178        this.lsLimit = configuredLimit>0 ?
179            configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
180        this.contentCountLimit = conf.getInt(
181            DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY,
182            DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT);
183        
184        // filesystem limits
185        this.maxComponentLength = conf.getInt(
186            DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY,
187            DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT);
188        this.maxDirItems = conf.getInt(
189            DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY,
190            DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_DEFAULT);
191        // We need a maximum maximum because by default, PB limits message sizes
192        // to 64MB. This means we can only store approximately 6.7 million entries
193        // per directory, but let's use 6.4 million for some safety.
194        final int MAX_DIR_ITEMS = 64 * 100 * 1000;
195        Preconditions.checkArgument(
196            maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS, "Cannot set "
197                + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY
198                + " to a value less than 0 or greater than " + MAX_DIR_ITEMS);
199    
200        int threshold = conf.getInt(
201            DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
202            DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
203        NameNode.LOG.info("Caching file names occuring more than " + threshold
204            + " times");
205        nameCache = new NameCache<ByteArray>(threshold);
206        namesystem = ns;
207      }
208        
209      private FSNamesystem getFSNamesystem() {
210        return namesystem;
211      }
212    
213      private BlockManager getBlockManager() {
214        return getFSNamesystem().getBlockManager();
215      }
216    
217      /** @return the root directory inode. */
218      public INodeDirectory getRoot() {
219        return rootDir;
220      }
221    
222      /**
223       * Notify that loading of this FSDirectory is complete, and
224       * it is ready for use 
225       */
226      void imageLoadComplete() {
227        Preconditions.checkState(!ready, "FSDirectory already loaded");
228        setReady();
229      }
230    
231      void setReady() {
232        if(ready) return;
233        writeLock();
234        try {
235          setReady(true);
236          this.nameCache.initialized();
237          cond.signalAll();
238        } finally {
239          writeUnlock();
240        }
241      }
242      
243      //This is for testing purposes only
244      @VisibleForTesting
245      boolean isReady() {
246        return ready;
247      }
248    
249      // exposed for unit tests
250      protected void setReady(boolean flag) {
251        ready = flag;
252      }
253    
254      private void incrDeletedFileCount(long count) {
255        if (getFSNamesystem() != null)
256          NameNode.getNameNodeMetrics().incrFilesDeleted(count);
257      }
258        
259      /**
260       * Shutdown the filestore
261       */
262      @Override
263      public void close() throws IOException {
264        fsImage.close();
265      }
266    
267      /**
268       * Block until the object is ready to be used.
269       */
270      void waitForReady() {
271        if (!ready) {
272          writeLock();
273          try {
274            while (!ready) {
275              try {
276                cond.await(5000, TimeUnit.MILLISECONDS);
277              } catch (InterruptedException ie) {
278              }
279            }
280          } finally {
281            writeUnlock();
282          }
283        }
284      }
285    
286      /**
287       * Add the given filename to the fs.
288       * @throws FileAlreadyExistsException
289       * @throws QuotaExceededException
290       * @throws UnresolvedLinkException
291       * @throws SnapshotAccessControlException 
292       */
293      INodeFile addFile(String path, PermissionStatus permissions,
294          short replication, long preferredBlockSize, String clientName,
295          String clientMachine, DatanodeDescriptor clientNode)
296        throws FileAlreadyExistsException, QuotaExceededException,
297          UnresolvedLinkException, SnapshotAccessControlException, AclException {
298        waitForReady();
299    
300        // Always do an implicit mkdirs for parent directory tree.
301        long modTime = now();
302        
303        Path parent = new Path(path).getParent();
304        if (parent == null) {
305          // Trying to add "/" as a file - this path has no
306          // parent -- avoids an NPE below.
307          return null;
308        }
309        
310        if (!mkdirs(parent.toString(), permissions, true, modTime)) {
311          return null;
312        }
313        INodeFile newNode = new INodeFile(namesystem.allocateNewInodeId(), null,
314            permissions, modTime, modTime, BlockInfo.EMPTY_ARRAY, replication,
315            preferredBlockSize);
316        newNode.toUnderConstruction(clientName, clientMachine, clientNode);
317    
318        boolean added = false;
319        writeLock();
320        try {
321          added = addINode(path, newNode);
322        } finally {
323          writeUnlock();
324        }
325        if (!added) {
326          NameNode.stateChangeLog.info("DIR* addFile: failed to add " + path);
327          return null;
328        }
329    
330        if(NameNode.stateChangeLog.isDebugEnabled()) {
331          NameNode.stateChangeLog.debug("DIR* addFile: " + path + " is added");
332        }
333        return newNode;
334      }
335    
336      INodeFile unprotectedAddFile( long id,
337                                String path, 
338                                PermissionStatus permissions,
339                                List<AclEntry> aclEntries,
340                                short replication,
341                                long modificationTime,
342                                long atime,
343                                long preferredBlockSize,
344                                boolean underConstruction,
345                                String clientName,
346                                String clientMachine) {
347        final INodeFile newNode;
348        assert hasWriteLock();
349        if (underConstruction) {
350          newNode = new INodeFile(id, null, permissions, modificationTime,
351              modificationTime, BlockInfo.EMPTY_ARRAY, replication,
352              preferredBlockSize);
353          newNode.toUnderConstruction(clientName, clientMachine, null);
354    
355        } else {
356          newNode = new INodeFile(id, null, permissions, modificationTime, atime,
357              BlockInfo.EMPTY_ARRAY, replication, preferredBlockSize);
358        }
359    
360        try {
361          if (addINode(path, newNode)) {
362            if (aclEntries != null) {
363              AclStorage.updateINodeAcl(newNode, aclEntries,
364                Snapshot.CURRENT_STATE_ID);
365            }
366            return newNode;
367          }
368        } catch (IOException e) {
369          if(NameNode.stateChangeLog.isDebugEnabled()) {
370            NameNode.stateChangeLog.debug(
371                "DIR* FSDirectory.unprotectedAddFile: exception when add " + path
372                    + " to the file system", e);
373          }
374        }
375        return null;
376      }
377    
378      /**
379       * Add a block to the file. Returns a reference to the added block.
380       */
381      BlockInfo addBlock(String path, INodesInPath inodesInPath, Block block,
382          DatanodeStorageInfo[] targets) throws IOException {
383        waitForReady();
384    
385        writeLock();
386        try {
387          final INodeFile fileINode = inodesInPath.getLastINode().asFile();
388          Preconditions.checkState(fileINode.isUnderConstruction());
389    
390          // check quota limits and updated space consumed
391          updateCount(inodesInPath, 0, fileINode.getBlockDiskspace(), true);
392    
393          // associate new last block for the file
394          BlockInfoUnderConstruction blockInfo =
395            new BlockInfoUnderConstruction(
396                block,
397                fileINode.getFileReplication(),
398                BlockUCState.UNDER_CONSTRUCTION,
399                targets);
400          getBlockManager().addBlockCollection(blockInfo, fileINode);
401          fileINode.addBlock(blockInfo);
402    
403          if(NameNode.stateChangeLog.isDebugEnabled()) {
404            NameNode.stateChangeLog.debug("DIR* FSDirectory.addBlock: "
405                + path + " with " + block
406                + " block is added to the in-memory "
407                + "file system");
408          }
409          return blockInfo;
410        } finally {
411          writeUnlock();
412        }
413      }
414    
415      /**
416       * Persist the block list for the inode.
417       */
418      void persistBlocks(String path, INodeFile file, boolean logRetryCache) {
419        Preconditions.checkArgument(file.isUnderConstruction());
420        waitForReady();
421    
422        writeLock();
423        try {
424          fsImage.getEditLog().logUpdateBlocks(path, file, logRetryCache);
425          if(NameNode.stateChangeLog.isDebugEnabled()) {
426            NameNode.stateChangeLog.debug("DIR* FSDirectory.persistBlocks: "
427                +path+" with "+ file.getBlocks().length 
428                +" blocks is persisted to the file system");
429          }
430        } finally {
431          writeUnlock();
432        }
433      }
434      
435      /**
436       * Persist the new block (the last block of the given file).
437       */
438      void persistNewBlock(String path, INodeFile file) {
439        Preconditions.checkArgument(file.isUnderConstruction());
440        waitForReady();
441    
442        writeLock();
443        try {
444          fsImage.getEditLog().logAddBlock(path, file);
445        } finally {
446          writeUnlock();
447        }
448        if (NameNode.stateChangeLog.isDebugEnabled()) {
449          NameNode.stateChangeLog.debug("DIR* FSDirectory.persistNewBlock: "
450              + path + " with new block " + file.getLastBlock().toString()
451              + ", current total block count is " + file.getBlocks().length);
452        }
453      }
454      
455      /**
456       * Close file.
457       */
458      void closeFile(String path, INodeFile file) {
459        waitForReady();
460        writeLock();
461        try {
462          // file is closed
463          fsImage.getEditLog().logCloseFile(path, file);
464          if (NameNode.stateChangeLog.isDebugEnabled()) {
465            NameNode.stateChangeLog.debug("DIR* FSDirectory.closeFile: "
466                +path+" with "+ file.getBlocks().length 
467                +" blocks is persisted to the file system");
468          }
469        } finally {
470          writeUnlock();
471        }
472      }
473    
474      /**
475       * Remove a block from the file.
476       * @return Whether the block exists in the corresponding file
477       */
478      boolean removeBlock(String path, INodeFile fileNode, Block block)
479          throws IOException {
480        Preconditions.checkArgument(fileNode.isUnderConstruction());
481        waitForReady();
482    
483        writeLock();
484        try {
485          return unprotectedRemoveBlock(path, fileNode, block);
486        } finally {
487          writeUnlock();
488        }
489      }
490      
491      boolean unprotectedRemoveBlock(String path,
492          INodeFile fileNode, Block block) throws IOException {
493        // modify file-> block and blocksMap
494        // fileNode should be under construction
495        boolean removed = fileNode.removeLastBlock(block);
496        if (!removed) {
497          return false;
498        }
499        getBlockManager().removeBlockFromMap(block);
500    
501        if(NameNode.stateChangeLog.isDebugEnabled()) {
502          NameNode.stateChangeLog.debug("DIR* FSDirectory.removeBlock: "
503              +path+" with "+block
504              +" block is removed from the file system");
505        }
506    
507        // update space consumed
508        final INodesInPath iip = rootDir.getINodesInPath4Write(path, true);
509        updateCount(iip, 0, -fileNode.getBlockDiskspace(), true);
510        return true;
511      }
512    
513      /**
514       * @throws SnapshotAccessControlException 
515       * @see #unprotectedRenameTo(String, String, long)
516       * @deprecated Use {@link #renameTo(String, String, Rename...)} instead.
517       */
518      @Deprecated
519      boolean renameTo(String src, String dst, boolean logRetryCache) 
520          throws QuotaExceededException, UnresolvedLinkException, 
521          FileAlreadyExistsException, SnapshotAccessControlException, IOException {
522        if (NameNode.stateChangeLog.isDebugEnabled()) {
523          NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: "
524              +src+" to "+dst);
525        }
526        waitForReady();
527        long now = now();
528        writeLock();
529        try {
530          if (!unprotectedRenameTo(src, dst, now))
531            return false;
532        } finally {
533          writeUnlock();
534        }
535        fsImage.getEditLog().logRename(src, dst, now, logRetryCache);
536        return true;
537      }
538    
539      /**
540       * @see #unprotectedRenameTo(String, String, long, Options.Rename...)
541       */
542      void renameTo(String src, String dst, boolean logRetryCache, 
543          Options.Rename... options)
544          throws FileAlreadyExistsException, FileNotFoundException,
545          ParentNotDirectoryException, QuotaExceededException,
546          UnresolvedLinkException, IOException {
547        if (NameNode.stateChangeLog.isDebugEnabled()) {
548          NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: " + src
549              + " to " + dst);
550        }
551        waitForReady();
552        long now = now();
553        writeLock();
554        try {
555          if (unprotectedRenameTo(src, dst, now, options)) {
556            incrDeletedFileCount(1);
557          }
558        } finally {
559          writeUnlock();
560        }
561        fsImage.getEditLog().logRename(src, dst, now, logRetryCache, options);
562      }
563    
564      /**
565       * Change a path name
566       * 
567       * @param src source path
568       * @param dst destination path
569       * @return true if rename succeeds; false otherwise
570       * @throws QuotaExceededException if the operation violates any quota limit
571       * @throws FileAlreadyExistsException if the src is a symlink that points to dst
572       * @throws SnapshotAccessControlException if path is in RO snapshot
573       * @deprecated See {@link #renameTo(String, String)}
574       */
575      @Deprecated
576      boolean unprotectedRenameTo(String src, String dst, long timestamp)
577        throws QuotaExceededException, UnresolvedLinkException, 
578        FileAlreadyExistsException, SnapshotAccessControlException, IOException {
579        assert hasWriteLock();
580        INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
581        final INode srcInode = srcIIP.getLastINode();
582        
583        // check the validation of the source
584        if (srcInode == null) {
585          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
586              + "failed to rename " + src + " to " + dst
587              + " because source does not exist");
588          return false;
589        } 
590        if (srcIIP.getINodes().length == 1) {
591          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
592              +"failed to rename "+src+" to "+dst+ " because source is the root");
593          return false;
594        }
595        
596        // srcInode and its subtree cannot contain snapshottable directories with
597        // snapshots
598        List<INodeDirectorySnapshottable> snapshottableDirs = 
599            new ArrayList<INodeDirectorySnapshottable>();
600        checkSnapshot(srcInode, snapshottableDirs);
601        
602        if (isDir(dst)) {
603          dst += Path.SEPARATOR + new Path(src).getName();
604        }
605        
606        // check the validity of the destination
607        if (dst.equals(src)) {
608          return true;
609        }
610        if (srcInode.isSymlink() && 
611            dst.equals(srcInode.asSymlink().getSymlinkString())) {
612          throw new FileAlreadyExistsException(
613              "Cannot rename symlink "+src+" to its target "+dst);
614        }
615        
616        // dst cannot be directory or a file under src
617        if (dst.startsWith(src) && 
618            dst.charAt(src.length()) == Path.SEPARATOR_CHAR) {
619          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
620              + "failed to rename " + src + " to " + dst
621              + " because destination starts with src");
622          return false;
623        }
624        
625        byte[][] dstComponents = INode.getPathComponents(dst);
626        INodesInPath dstIIP = getExistingPathINodes(dstComponents);
627        if (dstIIP.isSnapshot()) {
628          throw new SnapshotAccessControlException(
629              "Modification on RO snapshot is disallowed");
630        }
631        if (dstIIP.getLastINode() != null) {
632          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
633                                       +"failed to rename "+src+" to "+dst+ 
634                                       " because destination exists");
635          return false;
636        }
637        INode dstParent = dstIIP.getINode(-2);
638        if (dstParent == null) {
639          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
640              +"failed to rename "+src+" to "+dst+ 
641              " because destination's parent does not exist");
642          return false;
643        }
644        
645        // Ensure dst has quota to accommodate rename
646        verifyFsLimitsForRename(srcIIP, dstIIP);
647        verifyQuotaForRename(srcIIP.getINodes(), dstIIP.getINodes());
648        
649        boolean added = false;
650        INode srcChild = srcIIP.getLastINode();
651        final byte[] srcChildName = srcChild.getLocalNameBytes();
652        final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
653            srcIIP.getLatestSnapshotId());
654        final boolean srcChildIsReference = srcChild.isReference();
655        
656        // Record the snapshot on srcChild. After the rename, before any new 
657        // snapshot is taken on the dst tree, changes will be recorded in the latest
658        // snapshot of the src tree.
659        if (isSrcInSnapshot) {
660          srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
661          srcIIP.setLastINode(srcChild);
662        }
663        
664        // check srcChild for reference
665        final INodeReference.WithCount withCount;
666        Quota.Counts oldSrcCounts = Quota.Counts.newInstance();
667        int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
668            .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
669        if (isSrcInSnapshot) {
670          final INodeReference.WithName withName = 
671              srcIIP.getINode(-2).asDirectory().replaceChild4ReferenceWithName(
672                  srcChild, srcIIP.getLatestSnapshotId()); 
673          withCount = (INodeReference.WithCount) withName.getReferredINode();
674          srcChild = withName;
675          srcIIP.setLastINode(srcChild);
676          // get the counts before rename
677          withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
678        } else if (srcChildIsReference) {
679          // srcChild is reference but srcChild is not in latest snapshot
680          withCount = (WithCount) srcChild.asReference().getReferredINode();
681        } else {
682          withCount = null;
683        }
684    
685        try {
686          // remove src
687          final long removedSrc = removeLastINode(srcIIP);
688          if (removedSrc == -1) {
689            NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
690                + "failed to rename " + src + " to " + dst
691                + " because the source can not be removed");
692            return false;
693          }
694          
695          if (dstParent.getParent() == null) {
696            // src and dst file/dir are in the same directory, and the dstParent has
697            // been replaced when we removed the src. Refresh the dstIIP and
698            // dstParent.
699            dstIIP = getExistingPathINodes(dstComponents);
700            dstParent = dstIIP.getINode(-2);
701          }
702          
703          // add src to the destination
704          
705          srcChild = srcIIP.getLastINode();
706          final byte[] dstChildName = dstIIP.getLastLocalName();
707          final INode toDst;
708          if (withCount == null) {
709            srcChild.setLocalName(dstChildName);
710            toDst = srcChild;
711          } else {
712            withCount.getReferredINode().setLocalName(dstChildName);
713            int dstSnapshotId = dstIIP.getLatestSnapshotId();
714            final INodeReference.DstReference ref = new INodeReference.DstReference(
715                dstParent.asDirectory(), withCount, dstSnapshotId);
716            toDst = ref;
717          }
718          
719          added = addLastINodeNoQuotaCheck(dstIIP, toDst);
720          if (added) {
721            if (NameNode.stateChangeLog.isDebugEnabled()) {
722              NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedRenameTo: " 
723                  + src + " is renamed to " + dst);
724            }
725            // update modification time of dst and the parent of src
726            final INode srcParent = srcIIP.getINode(-2);
727            srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
728            dstParent = dstIIP.getINode(-2); // refresh dstParent
729            dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
730            // update moved leases with new filename
731            getFSNamesystem().unprotectedChangeLease(src, dst);     
732    
733            // update the quota usage in src tree
734            if (isSrcInSnapshot) {
735              // get the counts after rename
736              Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
737                  Quota.Counts.newInstance(), false);
738              newSrcCounts.subtract(oldSrcCounts);
739              srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
740                  newSrcCounts.get(Quota.DISKSPACE), false);
741            }
742            
743            return true;
744          }
745        } finally {
746          if (!added) {
747            final INodeDirectory srcParent = srcIIP.getINode(-2).asDirectory();
748            final INode oldSrcChild = srcChild;
749            // put it back
750            if (withCount == null) {
751              srcChild.setLocalName(srcChildName);
752            } else if (!srcChildIsReference) { // src must be in snapshot
753              // the withCount node will no longer be used thus no need to update
754              // its reference number here
755              final INode originalChild = withCount.getReferredINode();
756              srcChild = originalChild;
757              srcChild.setLocalName(srcChildName);
758            } else {
759              withCount.removeReference(oldSrcChild.asReference());
760              final INodeReference originalRef = new INodeReference.DstReference(
761                  srcParent, withCount, srcRefDstSnapshot);
762              srcChild = originalRef;
763              withCount.getReferredINode().setLocalName(srcChildName);
764            }
765            
766            if (isSrcInSnapshot) {
767              // srcParent must have snapshot feature since isSrcInSnapshot is true
768              // and src node has been removed from srcParent 
769              srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
770            } else {
771              // original srcChild is not in latest snapshot, we only need to add
772              // the srcChild back
773              addLastINodeNoQuotaCheck(srcIIP, srcChild);
774            }
775          }
776        }
777        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
778            +"failed to rename "+src+" to "+dst);
779        return false;
780      }
781    
782      /**
783       * Rename src to dst.
784       * See {@link DistributedFileSystem#rename(Path, Path, Options.Rename...)}
785       * for details related to rename semantics and exceptions.
786       * 
787       * @param src source path
788       * @param dst destination path
789       * @param timestamp modification time
790       * @param options Rename options
791       */
792      boolean unprotectedRenameTo(String src, String dst, long timestamp,
793          Options.Rename... options) throws FileAlreadyExistsException,
794          FileNotFoundException, ParentNotDirectoryException,
795          QuotaExceededException, UnresolvedLinkException, IOException {
796        assert hasWriteLock();
797        boolean overwrite = false;
798        if (null != options) {
799          for (Rename option : options) {
800            if (option == Rename.OVERWRITE) {
801              overwrite = true;
802            }
803          }
804        }
805        String error = null;
806        final INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
807        final INode srcInode = srcIIP.getLastINode();
808        // validate source
809        if (srcInode == null) {
810          error = "rename source " + src + " is not found.";
811          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
812              + error);
813          throw new FileNotFoundException(error);
814        }
815        if (srcIIP.getINodes().length == 1) {
816          error = "rename source cannot be the root";
817          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
818              + error);
819          throw new IOException(error);
820        }
821        // srcInode and its subtree cannot contain snapshottable directories with
822        // snapshots
823        checkSnapshot(srcInode, null);
824        
825        // validate the destination
826        if (dst.equals(src)) {
827          throw new FileAlreadyExistsException(
828              "The source "+src+" and destination "+dst+" are the same");
829        }
830        if (srcInode.isSymlink() && 
831            dst.equals(srcInode.asSymlink().getSymlinkString())) {
832          throw new FileAlreadyExistsException(
833              "Cannot rename symlink "+src+" to its target "+dst);
834        }
835        // dst cannot be a directory or a file under src
836        if (dst.startsWith(src) && 
837            dst.charAt(src.length()) == Path.SEPARATOR_CHAR) {
838          error = "Rename destination " + dst
839              + " is a directory or file under source " + src;
840          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
841              + error);
842          throw new IOException(error);
843        }
844        INodesInPath dstIIP = rootDir.getINodesInPath4Write(dst, false);
845        if (dstIIP.getINodes().length == 1) {
846          error = "rename destination cannot be the root";
847          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
848              + error);
849          throw new IOException(error);
850        }
851    
852        final INode dstInode = dstIIP.getLastINode();
853        List<INodeDirectorySnapshottable> snapshottableDirs = 
854            new ArrayList<INodeDirectorySnapshottable>();
855        if (dstInode != null) { // Destination exists
856          // It's OK to rename a file to a symlink and vice versa
857          if (dstInode.isDirectory() != srcInode.isDirectory()) {
858            error = "Source " + src + " and destination " + dst
859                + " must both be directories";
860            NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
861                + error);
862            throw new IOException(error);
863          }
864          if (!overwrite) { // If destination exists, overwrite flag must be true
865            error = "rename destination " + dst + " already exists";
866            NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
867                + error);
868            throw new FileAlreadyExistsException(error);
869          }
870          if (dstInode.isDirectory()) {
871            final ReadOnlyList<INode> children = dstInode.asDirectory()
872                .getChildrenList(Snapshot.CURRENT_STATE_ID);
873            if (!children.isEmpty()) {
874              error = "rename destination directory is not empty: " + dst;
875              NameNode.stateChangeLog.warn(
876                  "DIR* FSDirectory.unprotectedRenameTo: " + error);
877              throw new IOException(error);
878            }
879          }
880          checkSnapshot(dstInode, snapshottableDirs);
881        }
882    
883        INode dstParent = dstIIP.getINode(-2);
884        if (dstParent == null) {
885          error = "rename destination parent " + dst + " not found.";
886          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
887              + error);
888          throw new FileNotFoundException(error);
889        }
890        if (!dstParent.isDirectory()) {
891          error = "rename destination parent " + dst + " is a file.";
892          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
893              + error);
894          throw new ParentNotDirectoryException(error);
895        }
896    
897        // Ensure dst has quota to accommodate rename
898        verifyFsLimitsForRename(srcIIP, dstIIP);
899        verifyQuotaForRename(srcIIP.getINodes(), dstIIP.getINodes());
900    
901        INode srcChild = srcIIP.getLastINode();
902        final byte[] srcChildName = srcChild.getLocalNameBytes();
903        final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
904            srcIIP.getLatestSnapshotId());
905        final boolean srcChildIsReference = srcChild.isReference();
906        
907        // Record the snapshot on srcChild. After the rename, before any new 
908        // snapshot is taken on the dst tree, changes will be recorded in the latest
909        // snapshot of the src tree.
910        if (isSrcInSnapshot) {
911          srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
912          srcIIP.setLastINode(srcChild);
913        }
914        
915        // check srcChild for reference
916        final INodeReference.WithCount withCount;
917        int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
918            .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
919        Quota.Counts oldSrcCounts = Quota.Counts.newInstance();    
920        if (isSrcInSnapshot) {
921          final INodeReference.WithName withName = srcIIP.getINode(-2).asDirectory()
922              .replaceChild4ReferenceWithName(srcChild, srcIIP.getLatestSnapshotId()); 
923          withCount = (INodeReference.WithCount) withName.getReferredINode();
924          srcChild = withName;
925          srcIIP.setLastINode(srcChild);
926          // get the counts before rename
927          withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
928        } else if (srcChildIsReference) {
929          // srcChild is reference but srcChild is not in latest snapshot
930          withCount = (WithCount) srcChild.asReference().getReferredINode();
931        } else {
932          withCount = null;
933        }
934        
935        boolean undoRemoveSrc = true;
936        final long removedSrc = removeLastINode(srcIIP);
937        if (removedSrc == -1) {
938          error = "Failed to rename " + src + " to " + dst
939              + " because the source can not be removed";
940          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
941              + error);
942          throw new IOException(error);
943        }
944        
945        if (dstParent.getParent() == null) {
946          // src and dst file/dir are in the same directory, and the dstParent has
947          // been replaced when we removed the src. Refresh the dstIIP and
948          // dstParent.
949          dstIIP = rootDir.getINodesInPath4Write(dst, false);
950        }
951        
952        boolean undoRemoveDst = false;
953        INode removedDst = null;
954        try {
955          if (dstInode != null) { // dst exists remove it
956            if (removeLastINode(dstIIP) != -1) {
957              removedDst = dstIIP.getLastINode();
958              undoRemoveDst = true;
959            }
960          }
961          
962          srcChild = srcIIP.getLastINode();
963    
964          final byte[] dstChildName = dstIIP.getLastLocalName();
965          final INode toDst;
966          if (withCount == null) {
967            srcChild.setLocalName(dstChildName);
968            toDst = srcChild;
969          } else {
970            withCount.getReferredINode().setLocalName(dstChildName);
971            int dstSnapshotId = dstIIP.getLatestSnapshotId();
972            final INodeReference.DstReference ref = new INodeReference.DstReference(
973                dstIIP.getINode(-2).asDirectory(), withCount, dstSnapshotId);
974            toDst = ref;
975          }
976    
977          // add src as dst to complete rename
978          if (addLastINodeNoQuotaCheck(dstIIP, toDst)) {
979            undoRemoveSrc = false;
980            if (NameNode.stateChangeLog.isDebugEnabled()) {
981              NameNode.stateChangeLog.debug(
982                  "DIR* FSDirectory.unprotectedRenameTo: " + src
983                  + " is renamed to " + dst);
984            }
985    
986            final INode srcParent = srcIIP.getINode(-2);
987            srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
988            dstParent = dstIIP.getINode(-2);
989            dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
990            // update moved lease with new filename
991            getFSNamesystem().unprotectedChangeLease(src, dst);
992    
993            // Collect the blocks and remove the lease for previous dst
994            long filesDeleted = -1;
995            if (removedDst != null) {
996              undoRemoveDst = false;
997              BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
998              List<INode> removedINodes = new ChunkedArrayList<INode>();
999              filesDeleted = removedDst.cleanSubtree(Snapshot.CURRENT_STATE_ID,
1000                  dstIIP.getLatestSnapshotId(), collectedBlocks, removedINodes, true)
1001                  .get(Quota.NAMESPACE);
1002              getFSNamesystem().removePathAndBlocks(src, collectedBlocks,
1003                  removedINodes);
1004            }
1005    
1006            if (snapshottableDirs.size() > 0) {
1007              // There are snapshottable directories (without snapshots) to be
1008              // deleted. Need to update the SnapshotManager.
1009              namesystem.removeSnapshottableDirs(snapshottableDirs);
1010            }
1011            
1012            // update the quota usage in src tree
1013            if (isSrcInSnapshot) {
1014              // get the counts after rename
1015              Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
1016                  Quota.Counts.newInstance(), false);
1017              newSrcCounts.subtract(oldSrcCounts);
1018              srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
1019                  newSrcCounts.get(Quota.DISKSPACE), false);
1020            }
1021            
1022            return filesDeleted >= 0;
1023          }
1024        } finally {
1025          if (undoRemoveSrc) {
1026            // Rename failed - restore src
1027            final INodeDirectory srcParent = srcIIP.getINode(-2).asDirectory();
1028            final INode oldSrcChild = srcChild;
1029            // put it back
1030            if (withCount == null) {
1031              srcChild.setLocalName(srcChildName);
1032            } else if (!srcChildIsReference) { // src must be in snapshot
1033              // the withCount node will no longer be used thus no need to update
1034              // its reference number here
1035              final INode originalChild = withCount.getReferredINode();
1036              srcChild = originalChild;
1037              srcChild.setLocalName(srcChildName);
1038            } else {
1039              withCount.removeReference(oldSrcChild.asReference());
1040              final INodeReference originalRef = new INodeReference.DstReference(
1041                  srcParent, withCount, srcRefDstSnapshot);
1042              srcChild = originalRef;
1043              withCount.getReferredINode().setLocalName(srcChildName);
1044            }
1045            
1046            if (srcParent.isWithSnapshot()) {
1047              srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
1048            } else {
1049              // srcParent is not an INodeDirectoryWithSnapshot, we only need to add
1050              // the srcChild back
1051              addLastINodeNoQuotaCheck(srcIIP, srcChild);
1052            }
1053          }
1054          if (undoRemoveDst) {
1055            // Rename failed - restore dst
1056            if (dstParent.isDirectory() && dstParent.asDirectory().isWithSnapshot()) {
1057              dstParent.asDirectory().undoRename4DstParent(removedDst,
1058                  dstIIP.getLatestSnapshotId());
1059            } else {
1060              addLastINodeNoQuotaCheck(dstIIP, removedDst);
1061            }
1062            if (removedDst.isReference()) {
1063              final INodeReference removedDstRef = removedDst.asReference();
1064              final INodeReference.WithCount wc = 
1065                  (WithCount) removedDstRef.getReferredINode().asReference();
1066              wc.addReference(removedDstRef);
1067            }
1068          }
1069        }
1070        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
1071            + "failed to rename " + src + " to " + dst);
1072        throw new IOException("rename from " + src + " to " + dst + " failed.");
1073      }
1074      
1075      /**
1076       * Set file replication
1077       * 
1078       * @param src file name
1079       * @param replication new replication
1080       * @param blockRepls block replications - output parameter
1081       * @return array of file blocks
1082       * @throws QuotaExceededException
1083       * @throws SnapshotAccessControlException 
1084       */
1085      Block[] setReplication(String src, short replication, short[] blockRepls)
1086          throws QuotaExceededException, UnresolvedLinkException,
1087          SnapshotAccessControlException {
1088        waitForReady();
1089        writeLock();
1090        try {
1091          final Block[] fileBlocks = unprotectedSetReplication(
1092              src, replication, blockRepls);
1093          if (fileBlocks != null)  // log replication change
1094            fsImage.getEditLog().logSetReplication(src, replication);
1095          return fileBlocks;
1096        } finally {
1097          writeUnlock();
1098        }
1099      }
1100    
1101      Block[] unprotectedSetReplication(String src, short replication,
1102          short[] blockRepls) throws QuotaExceededException,
1103          UnresolvedLinkException, SnapshotAccessControlException {
1104        assert hasWriteLock();
1105    
1106        final INodesInPath iip = rootDir.getINodesInPath4Write(src, true);
1107        final INode inode = iip.getLastINode();
1108        if (inode == null || !inode.isFile()) {
1109          return null;
1110        }
1111        INodeFile file = inode.asFile();
1112        final short oldBR = file.getBlockReplication();
1113    
1114        // before setFileReplication, check for increasing block replication.
1115        // if replication > oldBR, then newBR == replication.
1116        // if replication < oldBR, we don't know newBR yet. 
1117        if (replication > oldBR) {
1118          long dsDelta = (replication - oldBR)*(file.diskspaceConsumed()/oldBR);
1119          updateCount(iip, 0, dsDelta, true);
1120        }
1121    
1122        file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
1123            inodeMap);
1124        
1125        final short newBR = file.getBlockReplication(); 
1126        // check newBR < oldBR case. 
1127        if (newBR < oldBR) {
1128          long dsDelta = (newBR - oldBR)*(file.diskspaceConsumed()/newBR);
1129          updateCount(iip, 0, dsDelta, true);
1130        }
1131    
1132        if (blockRepls != null) {
1133          blockRepls[0] = oldBR;
1134          blockRepls[1] = newBR;
1135        }
1136        return file.getBlocks();
1137      }
1138    
1139      /**
1140       * @param path the file path
1141       * @return the block size of the file. 
1142       */
1143      long getPreferredBlockSize(String path) throws UnresolvedLinkException,
1144          FileNotFoundException, IOException {
1145        readLock();
1146        try {
1147          return INodeFile.valueOf(rootDir.getNode(path, false), path
1148              ).getPreferredBlockSize();
1149        } finally {
1150          readUnlock();
1151        }
1152      }
1153    
1154      boolean exists(String src) throws UnresolvedLinkException {
1155        src = normalizePath(src);
1156        readLock();
1157        try {
1158          INode inode = rootDir.getNode(src, false);
1159          if (inode == null) {
1160             return false;
1161          }
1162          return !inode.isFile() || inode.asFile().getBlocks() != null;
1163        } finally {
1164          readUnlock();
1165        }
1166      }
1167      
1168      void setPermission(String src, FsPermission permission)
1169          throws FileNotFoundException, UnresolvedLinkException,
1170          QuotaExceededException, SnapshotAccessControlException {
1171        writeLock();
1172        try {
1173          unprotectedSetPermission(src, permission);
1174        } finally {
1175          writeUnlock();
1176        }
1177        fsImage.getEditLog().logSetPermissions(src, permission);
1178      }
1179      
1180      void unprotectedSetPermission(String src, FsPermission permissions)
1181          throws FileNotFoundException, UnresolvedLinkException,
1182          QuotaExceededException, SnapshotAccessControlException {
1183        assert hasWriteLock();
1184        final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(src, true);
1185        final INode inode = inodesInPath.getLastINode();
1186        if (inode == null) {
1187          throw new FileNotFoundException("File does not exist: " + src);
1188        }
1189        int snapshotId = inodesInPath.getLatestSnapshotId();
1190        inode.setPermission(permissions, snapshotId);
1191      }
1192    
1193      void setOwner(String src, String username, String groupname)
1194          throws FileNotFoundException, UnresolvedLinkException,
1195          QuotaExceededException, SnapshotAccessControlException {
1196        writeLock();
1197        try {
1198          unprotectedSetOwner(src, username, groupname);
1199        } finally {
1200          writeUnlock();
1201        }
1202        fsImage.getEditLog().logSetOwner(src, username, groupname);
1203      }
1204    
1205      void unprotectedSetOwner(String src, String username, String groupname)
1206          throws FileNotFoundException, UnresolvedLinkException,
1207          QuotaExceededException, SnapshotAccessControlException {
1208        assert hasWriteLock();
1209        final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(src, true);
1210        INode inode = inodesInPath.getLastINode();
1211        if (inode == null) {
1212          throw new FileNotFoundException("File does not exist: " + src);
1213        }
1214        if (username != null) {
1215          inode = inode.setUser(username, inodesInPath.getLatestSnapshotId());
1216        }
1217        if (groupname != null) {
1218          inode.setGroup(groupname, inodesInPath.getLatestSnapshotId());
1219        }
1220      }
1221    
1222      /**
1223       * Concat all the blocks from srcs to trg and delete the srcs files
1224       */
1225      void concat(String target, String [] srcs, boolean supportRetryCache) 
1226          throws UnresolvedLinkException, QuotaExceededException,
1227          SnapshotAccessControlException, SnapshotException {
1228        writeLock();
1229        try {
1230          // actual move
1231          waitForReady();
1232          long timestamp = now();
1233          unprotectedConcat(target, srcs, timestamp);
1234          // do the commit
1235          fsImage.getEditLog().logConcat(target, srcs, timestamp, 
1236              supportRetryCache);
1237        } finally {
1238          writeUnlock();
1239        }
1240      }
1241    
1242      /**
1243       * Concat all the blocks from srcs to trg and delete the srcs files
1244       * @param target target file to move the blocks to
1245       * @param srcs list of file to move the blocks from
1246       */
1247      void unprotectedConcat(String target, String [] srcs, long timestamp) 
1248          throws UnresolvedLinkException, QuotaExceededException,
1249          SnapshotAccessControlException, SnapshotException {
1250        assert hasWriteLock();
1251        if (NameNode.stateChangeLog.isDebugEnabled()) {
1252          NameNode.stateChangeLog.debug("DIR* FSNamesystem.concat to "+target);
1253        }
1254        // do the move
1255        
1256        final INodesInPath trgIIP = rootDir.getINodesInPath4Write(target, true);
1257        final INode[] trgINodes = trgIIP.getINodes();
1258        final INodeFile trgInode = trgIIP.getLastINode().asFile();
1259        INodeDirectory trgParent = trgINodes[trgINodes.length-2].asDirectory();
1260        final int trgLatestSnapshot = trgIIP.getLatestSnapshotId();
1261        
1262        final INodeFile [] allSrcInodes = new INodeFile[srcs.length];
1263        for(int i = 0; i < srcs.length; i++) {
1264          final INodesInPath iip = getINodesInPath4Write(srcs[i]);
1265          final int latest = iip.getLatestSnapshotId();
1266          final INode inode = iip.getLastINode();
1267    
1268          // check if the file in the latest snapshot
1269          if (inode.isInLatestSnapshot(latest)) {
1270            throw new SnapshotException("Concat: the source file " + srcs[i]
1271                + " is in snapshot " + latest);
1272          }
1273    
1274          // check if the file has other references.
1275          if (inode.isReference() && ((INodeReference.WithCount)
1276              inode.asReference().getReferredINode()).getReferenceCount() > 1) {
1277            throw new SnapshotException("Concat: the source file " + srcs[i]
1278                + " is referred by some other reference in some snapshot.");
1279          }
1280    
1281          allSrcInodes[i] = inode.asFile();
1282        }
1283        trgInode.concatBlocks(allSrcInodes);
1284        
1285        // since we are in the same dir - we can use same parent to remove files
1286        int count = 0;
1287        for(INodeFile nodeToRemove: allSrcInodes) {
1288          if(nodeToRemove == null) continue;
1289          
1290          nodeToRemove.setBlocks(null);
1291          trgParent.removeChild(nodeToRemove, trgLatestSnapshot);
1292          inodeMap.remove(nodeToRemove);
1293          count++;
1294        }
1295        
1296        // update inodeMap
1297        removeFromInodeMap(Arrays.asList(allSrcInodes));
1298        
1299        trgInode.setModificationTime(timestamp, trgLatestSnapshot);
1300        trgParent.updateModificationTime(timestamp, trgLatestSnapshot);
1301        // update quota on the parent directory ('count' files removed, 0 space)
1302        unprotectedUpdateCount(trgIIP, trgINodes.length-1, -count, 0);
1303      }
1304    
1305      /**
1306       * Delete the target directory and collect the blocks under it
1307       * 
1308       * @param src Path of a directory to delete
1309       * @param collectedBlocks Blocks under the deleted directory
1310       * @param removedINodes INodes that should be removed from {@link #inodeMap}
1311       * @param logRetryCache Whether to record RPC IDs in editlog to support retry
1312       *                      cache rebuilding.
1313       * @return true on successful deletion; else false
1314       */
1315      boolean delete(String src, BlocksMapUpdateInfo collectedBlocks,
1316          List<INode> removedINodes, boolean logRetryCache) throws IOException {
1317        if (NameNode.stateChangeLog.isDebugEnabled()) {
1318          NameNode.stateChangeLog.debug("DIR* FSDirectory.delete: " + src);
1319        }
1320        waitForReady();
1321        long now = now();
1322        final long filesRemoved;
1323        writeLock();
1324        try {
1325          final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(
1326              normalizePath(src), false);
1327          if (!deleteAllowed(inodesInPath, src) ) {
1328            filesRemoved = -1;
1329          } else {
1330            List<INodeDirectorySnapshottable> snapshottableDirs = 
1331                new ArrayList<INodeDirectorySnapshottable>();
1332            checkSnapshot(inodesInPath.getLastINode(), snapshottableDirs);
1333            filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks,
1334                removedINodes, now);
1335            namesystem.removeSnapshottableDirs(snapshottableDirs);
1336          }
1337        } finally {
1338          writeUnlock();
1339        }
1340        if (filesRemoved < 0) {
1341          return false;
1342        }
1343        fsImage.getEditLog().logDelete(src, now, logRetryCache);
1344        incrDeletedFileCount(filesRemoved);
1345        // Blocks/INodes will be handled later by the caller of this method
1346        getFSNamesystem().removePathAndBlocks(src, null, null);
1347        return true;
1348      }
1349      
1350      private static boolean deleteAllowed(final INodesInPath iip,
1351          final String src) {
1352        final INode[] inodes = iip.getINodes(); 
1353        if (inodes == null || inodes.length == 0
1354            || inodes[inodes.length - 1] == null) {
1355          if(NameNode.stateChangeLog.isDebugEnabled()) {
1356            NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
1357                + "failed to remove " + src + " because it does not exist");
1358          }
1359          return false;
1360        } else if (inodes.length == 1) { // src is the root
1361          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: "
1362              + "failed to remove " + src
1363              + " because the root is not allowed to be deleted");
1364          return false;
1365        }
1366        return true;
1367      }
1368      
1369      /**
1370       * @return true if the path is a non-empty directory; otherwise, return false.
1371       */
1372      boolean isNonEmptyDirectory(String path) throws UnresolvedLinkException {
1373        readLock();
1374        try {
1375          final INodesInPath inodesInPath = rootDir.getLastINodeInPath(path, false);
1376          final INode inode = inodesInPath.getINode(0);
1377          if (inode == null || !inode.isDirectory()) {
1378            //not found or not a directory
1379            return false;
1380          }
1381          final int s = inodesInPath.getPathSnapshotId();
1382          return !inode.asDirectory().getChildrenList(s).isEmpty();
1383        } finally {
1384          readUnlock();
1385        }
1386      }
1387    
1388      /**
1389       * Delete a path from the name space
1390       * Update the count at each ancestor directory with quota
1391       * <br>
1392       * Note: This is to be used by {@link FSEditLog} only.
1393       * <br>
1394       * @param src a string representation of a path to an inode
1395       * @param mtime the time the inode is removed
1396       * @throws SnapshotAccessControlException if path is in RO snapshot
1397       */
1398      void unprotectedDelete(String src, long mtime) throws UnresolvedLinkException,
1399          QuotaExceededException, SnapshotAccessControlException, IOException {
1400        assert hasWriteLock();
1401        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
1402        List<INode> removedINodes = new ChunkedArrayList<INode>();
1403    
1404        final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(
1405            normalizePath(src), false);
1406        long filesRemoved = -1;
1407        if (deleteAllowed(inodesInPath, src)) {
1408          List<INodeDirectorySnapshottable> snapshottableDirs = 
1409              new ArrayList<INodeDirectorySnapshottable>();
1410          checkSnapshot(inodesInPath.getLastINode(), snapshottableDirs);
1411          filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks,
1412              removedINodes, mtime);
1413          namesystem.removeSnapshottableDirs(snapshottableDirs); 
1414        }
1415    
1416        if (filesRemoved >= 0) {
1417          getFSNamesystem().removePathAndBlocks(src, collectedBlocks, 
1418              removedINodes);
1419        }
1420      }
1421      
1422      /**
1423       * Delete a path from the name space
1424       * Update the count at each ancestor directory with quota
1425       * @param iip the inodes resolved from the path
1426       * @param collectedBlocks blocks collected from the deleted path
1427       * @param removedINodes inodes that should be removed from {@link #inodeMap}
1428       * @param mtime the time the inode is removed
1429       * @return the number of inodes deleted; 0 if no inodes are deleted.
1430       */ 
1431      long unprotectedDelete(INodesInPath iip, BlocksMapUpdateInfo collectedBlocks,
1432          List<INode> removedINodes, long mtime) throws QuotaExceededException {
1433        assert hasWriteLock();
1434    
1435        // check if target node exists
1436        INode targetNode = iip.getLastINode();
1437        if (targetNode == null) {
1438          return -1;
1439        }
1440    
1441        // record modification
1442        final int latestSnapshot = iip.getLatestSnapshotId();
1443        targetNode = targetNode.recordModification(latestSnapshot);
1444        iip.setLastINode(targetNode);
1445    
1446        // Remove the node from the namespace
1447        long removed = removeLastINode(iip);
1448        if (removed == -1) {
1449          return -1;
1450        }
1451    
1452        // set the parent's modification time
1453        final INodeDirectory parent = targetNode.getParent();
1454        parent.updateModificationTime(mtime, latestSnapshot);
1455        if (removed == 0) {
1456          return 0;
1457        }
1458        
1459        // collect block
1460        if (!targetNode.isInLatestSnapshot(latestSnapshot)) {
1461          targetNode.destroyAndCollectBlocks(collectedBlocks, removedINodes);
1462        } else {
1463          Quota.Counts counts = targetNode.cleanSubtree(Snapshot.CURRENT_STATE_ID,
1464              latestSnapshot, collectedBlocks, removedINodes, true);
1465          parent.addSpaceConsumed(-counts.get(Quota.NAMESPACE),
1466              -counts.get(Quota.DISKSPACE), true);
1467          removed = counts.get(Quota.NAMESPACE);
1468        }
1469        if (NameNode.stateChangeLog.isDebugEnabled()) {
1470          NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
1471              + targetNode.getFullPathName() + " is removed");
1472        }
1473        return removed;
1474      }
1475      
1476      /**
1477       * Check if the given INode (or one of its descendants) is snapshottable and
1478       * already has snapshots.
1479       * 
1480       * @param target The given INode
1481       * @param snapshottableDirs The list of directories that are snapshottable 
1482       *                          but do not have snapshots yet
1483       */
1484      private static void checkSnapshot(INode target,
1485          List<INodeDirectorySnapshottable> snapshottableDirs) throws IOException {
1486        if (target.isDirectory()) {
1487          INodeDirectory targetDir = target.asDirectory();
1488          if (targetDir.isSnapshottable()) {
1489            INodeDirectorySnapshottable ssTargetDir = 
1490                (INodeDirectorySnapshottable) targetDir;
1491            if (ssTargetDir.getNumSnapshots() > 0) {
1492              throw new IOException("The directory " + ssTargetDir.getFullPathName()
1493                  + " cannot be deleted since " + ssTargetDir.getFullPathName()
1494                  + " is snapshottable and already has snapshots");
1495            } else {
1496              if (snapshottableDirs != null) {
1497                snapshottableDirs.add(ssTargetDir);
1498              }
1499            }
1500          } 
1501          for (INode child : targetDir.getChildrenList(Snapshot.CURRENT_STATE_ID)) {
1502            checkSnapshot(child, snapshottableDirs);
1503          }
1504        }
1505      }
1506    
1507      /**
1508       * Get a partial listing of the indicated directory
1509       *
1510       * We will stop when any of the following conditions is met:
1511       * 1) this.lsLimit files have been added
1512       * 2) needLocation is true AND enough files have been added such
1513       * that at least this.lsLimit block locations are in the response
1514       *
1515       * @param src the directory name
1516       * @param startAfter the name to start listing after
1517       * @param needLocation if block locations are returned
1518       * @return a partial listing starting after startAfter
1519       */
1520      DirectoryListing getListing(String src, byte[] startAfter,
1521          boolean needLocation) throws UnresolvedLinkException, IOException {
1522        String srcs = normalizePath(src);
1523    
1524        readLock();
1525        try {
1526          if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) {
1527            return getSnapshotsListing(srcs, startAfter);
1528          }
1529          final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, true);
1530          final int snapshot = inodesInPath.getPathSnapshotId();
1531          final INode targetNode = inodesInPath.getINode(0);
1532          if (targetNode == null)
1533            return null;
1534          
1535          if (!targetNode.isDirectory()) {
1536            return new DirectoryListing(
1537                new HdfsFileStatus[]{createFileStatus(HdfsFileStatus.EMPTY_NAME,
1538                    targetNode, needLocation, snapshot)}, 0);
1539          }
1540    
1541          final INodeDirectory dirInode = targetNode.asDirectory();
1542          final ReadOnlyList<INode> contents = dirInode.getChildrenList(snapshot);
1543          int startChild = INodeDirectory.nextChild(contents, startAfter);
1544          int totalNumChildren = contents.size();
1545          int numOfListing = Math.min(totalNumChildren-startChild, this.lsLimit);
1546          int locationBudget = this.lsLimit;
1547          int listingCnt = 0;
1548          HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
1549          for (int i=0; i<numOfListing && locationBudget>0; i++) {
1550            INode cur = contents.get(startChild+i);
1551            listing[i] = createFileStatus(cur.getLocalNameBytes(), cur,
1552                needLocation, snapshot);
1553            listingCnt++;
1554            if (needLocation) {
1555                // Once we  hit lsLimit locations, stop.
1556                // This helps to prevent excessively large response payloads.
1557                // Approximate #locations with locatedBlockCount() * repl_factor
1558                LocatedBlocks blks = 
1559                    ((HdfsLocatedFileStatus)listing[i]).getBlockLocations();
1560                locationBudget -= (blks == null) ? 0 :
1561                   blks.locatedBlockCount() * listing[i].getReplication();
1562            }
1563          }
1564          // truncate return array if necessary
1565          if (listingCnt < numOfListing) {
1566              listing = Arrays.copyOf(listing, listingCnt);
1567          }
1568          return new DirectoryListing(
1569              listing, totalNumChildren-startChild-listingCnt);
1570        } finally {
1571          readUnlock();
1572        }
1573      }
1574      
1575      /**
1576       * Get a listing of all the snapshots of a snapshottable directory
1577       */
1578      private DirectoryListing getSnapshotsListing(String src, byte[] startAfter)
1579          throws UnresolvedLinkException, IOException {
1580        Preconditions.checkState(hasReadLock());
1581        Preconditions.checkArgument(
1582            src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR), 
1583            "%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1584        
1585        final String dirPath = normalizePath(src.substring(0,
1586            src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
1587        
1588        final INode node = this.getINode(dirPath);
1589        final INodeDirectorySnapshottable dirNode = INodeDirectorySnapshottable
1590            .valueOf(node, dirPath);
1591        final ReadOnlyList<Snapshot> snapshots = dirNode.getSnapshotList();
1592        int skipSize = ReadOnlyList.Util.binarySearch(snapshots, startAfter);
1593        skipSize = skipSize < 0 ? -skipSize - 1 : skipSize + 1;
1594        int numOfListing = Math.min(snapshots.size() - skipSize, this.lsLimit);
1595        final HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
1596        for (int i = 0; i < numOfListing; i++) {
1597          Root sRoot = snapshots.get(i + skipSize).getRoot();
1598          listing[i] = createFileStatus(sRoot.getLocalNameBytes(), sRoot,
1599              Snapshot.CURRENT_STATE_ID);
1600        }
1601        return new DirectoryListing(
1602            listing, snapshots.size() - skipSize - numOfListing);
1603      }
1604    
1605      /** Get the file info for a specific file.
1606       * @param src The string representation of the path to the file
1607       * @param resolveLink whether to throw UnresolvedLinkException 
1608       * @return object containing information regarding the file
1609       *         or null if file not found
1610       */
1611      HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
1612          throws UnresolvedLinkException {
1613        String srcs = normalizePath(src);
1614        readLock();
1615        try {
1616          if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) {
1617            return getFileInfo4DotSnapshot(srcs);
1618          }
1619          final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, resolveLink);
1620          final INode i = inodesInPath.getINode(0);
1621          return i == null? null: createFileStatus(HdfsFileStatus.EMPTY_NAME, i,
1622              inodesInPath.getPathSnapshotId());
1623        } finally {
1624          readUnlock();
1625        }
1626      }
1627      
1628      /**
1629       * Currently we only support "ls /xxx/.snapshot" which will return all the
1630       * snapshots of a directory. The FSCommand Ls will first call getFileInfo to
1631       * make sure the file/directory exists (before the real getListing call).
1632       * Since we do not have a real INode for ".snapshot", we return an empty
1633       * non-null HdfsFileStatus here.
1634       */
1635      private HdfsFileStatus getFileInfo4DotSnapshot(String src)
1636          throws UnresolvedLinkException {
1637        if (getINode4DotSnapshot(src) != null) {
1638          return new HdfsFileStatus(0, true, 0, 0, 0, 0, null, null, null, null,
1639              HdfsFileStatus.EMPTY_NAME, -1L, 0);
1640        }
1641        return null;
1642      }
1643    
1644      private INode getINode4DotSnapshot(String src) throws UnresolvedLinkException {
1645        Preconditions.checkArgument(
1646            src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR), 
1647            "%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1648        
1649        final String dirPath = normalizePath(src.substring(0,
1650            src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
1651        
1652        final INode node = this.getINode(dirPath);
1653        if (node != null
1654            && node.isDirectory()
1655            && node.asDirectory() instanceof INodeDirectorySnapshottable) {
1656          return node;
1657        }
1658        return null;
1659      }
1660    
1661      /**
1662       * Get the blocks associated with the file.
1663       */
1664      Block[] getFileBlocks(String src) throws UnresolvedLinkException {
1665        waitForReady();
1666        readLock();
1667        try {
1668          final INode i = rootDir.getNode(src, false);
1669          return i != null && i.isFile()? i.asFile().getBlocks(): null;
1670        } finally {
1671          readUnlock();
1672        }
1673      }
1674    
1675    
1676      INodesInPath getExistingPathINodes(byte[][] components)
1677          throws UnresolvedLinkException {
1678        return INodesInPath.resolve(rootDir, components);
1679      }
1680    
1681      /**
1682       * Get {@link INode} associated with the file / directory.
1683       */
1684      public INode getINode(String src) throws UnresolvedLinkException {
1685        return getLastINodeInPath(src).getINode(0);
1686      }
1687    
1688      /**
1689       * Get {@link INode} associated with the file / directory.
1690       */
1691      public INodesInPath getLastINodeInPath(String src)
1692           throws UnresolvedLinkException {
1693        readLock();
1694        try {
1695          return rootDir.getLastINodeInPath(src, true);
1696        } finally {
1697          readUnlock();
1698        }
1699      }
1700      
1701      /**
1702       * Get {@link INode} associated with the file / directory.
1703       */
1704      public INodesInPath getINodesInPath4Write(String src
1705          ) throws UnresolvedLinkException, SnapshotAccessControlException {
1706        readLock();
1707        try {
1708          return rootDir.getINodesInPath4Write(src, true);
1709        } finally {
1710          readUnlock();
1711        }
1712      }
1713    
1714      /**
1715       * Get {@link INode} associated with the file / directory.
1716       * @throws SnapshotAccessControlException if path is in RO snapshot
1717       */
1718      public INode getINode4Write(String src) throws UnresolvedLinkException,
1719          SnapshotAccessControlException {
1720        readLock();
1721        try {
1722          return rootDir.getINode4Write(src, true);
1723        } finally {
1724          readUnlock();
1725        }
1726      }
1727    
1728      /** 
1729       * Check whether the filepath could be created
1730       * @throws SnapshotAccessControlException if path is in RO snapshot
1731       */
1732      boolean isValidToCreate(String src) throws UnresolvedLinkException,
1733          SnapshotAccessControlException {
1734        String srcs = normalizePath(src);
1735        readLock();
1736        try {
1737          if (srcs.startsWith("/") && !srcs.endsWith("/")
1738              && rootDir.getINode4Write(srcs, false) == null) {
1739            return true;
1740          } else {
1741            return false;
1742          }
1743        } finally {
1744          readUnlock();
1745        }
1746      }
1747    
1748      /**
1749       * Check whether the path specifies a directory
1750       */
1751      boolean isDir(String src) throws UnresolvedLinkException {
1752        src = normalizePath(src);
1753        readLock();
1754        try {
1755          INode node = rootDir.getNode(src, false);
1756          return node != null && node.isDirectory();
1757        } finally {
1758          readUnlock();
1759        }
1760      }
1761      
1762      /**
1763       * Check whether the path specifies a directory
1764       * @throws SnapshotAccessControlException if path is in RO snapshot
1765       */
1766      boolean isDirMutable(String src) throws UnresolvedLinkException,
1767          SnapshotAccessControlException {
1768        src = normalizePath(src);
1769        readLock();
1770        try {
1771          INode node = rootDir.getINode4Write(src, false);
1772          return node != null && node.isDirectory();
1773        } finally {
1774          readUnlock();
1775        }
1776      }
1777    
1778      /** Updates namespace and diskspace consumed for all
1779       * directories until the parent directory of file represented by path.
1780       * 
1781       * @param path path for the file.
1782       * @param nsDelta the delta change of namespace
1783       * @param dsDelta the delta change of diskspace
1784       * @throws QuotaExceededException if the new count violates any quota limit
1785       * @throws FileNotFoundException if path does not exist.
1786       */
1787      void updateSpaceConsumed(String path, long nsDelta, long dsDelta)
1788          throws QuotaExceededException, FileNotFoundException,
1789              UnresolvedLinkException, SnapshotAccessControlException {
1790        writeLock();
1791        try {
1792          final INodesInPath iip = rootDir.getINodesInPath4Write(path, false);
1793          if (iip.getLastINode() == null) {
1794            throw new FileNotFoundException("Path not found: " + path);
1795          }
1796          updateCount(iip, nsDelta, dsDelta, true);
1797        } finally {
1798          writeUnlock();
1799        }
1800      }
1801      
1802      private void updateCount(INodesInPath iip, long nsDelta, long dsDelta,
1803          boolean checkQuota) throws QuotaExceededException {
1804        updateCount(iip, iip.getINodes().length - 1, nsDelta, dsDelta, checkQuota);
1805      }
1806    
1807      /** update count of each inode with quota
1808       * 
1809       * @param iip inodes in a path
1810       * @param numOfINodes the number of inodes to update starting from index 0
1811       * @param nsDelta the delta change of namespace
1812       * @param dsDelta the delta change of diskspace
1813       * @param checkQuota if true then check if quota is exceeded
1814       * @throws QuotaExceededException if the new count violates any quota limit
1815       */
1816      private void updateCount(INodesInPath iip, int numOfINodes, 
1817                               long nsDelta, long dsDelta, boolean checkQuota)
1818                               throws QuotaExceededException {
1819        assert hasWriteLock();
1820        if (!ready) {
1821          //still initializing. do not check or update quotas.
1822          return;
1823        }
1824        final INode[] inodes = iip.getINodes();
1825        if (numOfINodes > inodes.length) {
1826          numOfINodes = inodes.length;
1827        }
1828        if (checkQuota) {
1829          verifyQuota(inodes, numOfINodes, nsDelta, dsDelta, null);
1830        }
1831        unprotectedUpdateCount(iip, numOfINodes, nsDelta, dsDelta);
1832      }
1833      
1834      /** 
1835       * update quota of each inode and check to see if quota is exceeded. 
1836       * See {@link #updateCount(INode[], int, long, long, boolean)}
1837       */ 
1838      private void updateCountNoQuotaCheck(INodesInPath inodesInPath,
1839          int numOfINodes, long nsDelta, long dsDelta) {
1840        assert hasWriteLock();
1841        try {
1842          updateCount(inodesInPath, numOfINodes, nsDelta, dsDelta, false);
1843        } catch (QuotaExceededException e) {
1844          NameNode.LOG.error("BUG: unexpected exception ", e);
1845        }
1846      }
1847      
1848      /**
1849       * updates quota without verification
1850       * callers responsibility is to make sure quota is not exceeded
1851       */
1852      private static void unprotectedUpdateCount(INodesInPath inodesInPath,
1853          int numOfINodes, long nsDelta, long dsDelta) {
1854        final INode[] inodes = inodesInPath.getINodes();
1855        for(int i=0; i < numOfINodes; i++) {
1856          if (inodes[i].isQuotaSet()) { // a directory with quota
1857            inodes[i].asDirectory().getDirectoryWithQuotaFeature()
1858                .addSpaceConsumed2Cache(nsDelta, dsDelta);
1859          }
1860        }
1861      }
1862      
1863      /** Return the name of the path represented by inodes at [0, pos] */
1864      static String getFullPathName(INode[] inodes, int pos) {
1865        StringBuilder fullPathName = new StringBuilder();
1866        if (inodes[0].isRoot()) {
1867          if (pos == 0) return Path.SEPARATOR;
1868        } else {
1869          fullPathName.append(inodes[0].getLocalName());
1870        }
1871        
1872        for (int i=1; i<=pos; i++) {
1873          fullPathName.append(Path.SEPARATOR_CHAR).append(inodes[i].getLocalName());
1874        }
1875        return fullPathName.toString();
1876      }
1877    
1878      /**
1879       * @return the relative path of an inode from one of its ancestors,
1880       *         represented by an array of inodes.
1881       */
1882      private static INode[] getRelativePathINodes(INode inode, INode ancestor) {
1883        // calculate the depth of this inode from the ancestor
1884        int depth = 0;
1885        for (INode i = inode; i != null && !i.equals(ancestor); i = i.getParent()) {
1886          depth++;
1887        }
1888        INode[] inodes = new INode[depth];
1889    
1890        // fill up the inodes in the path from this inode to root
1891        for (int i = 0; i < depth; i++) {
1892          if (inode == null) {
1893            NameNode.stateChangeLog.warn("Could not get full path."
1894                + " Corresponding file might have deleted already.");
1895            return null;
1896          }
1897          inodes[depth-i-1] = inode;
1898          inode = inode.getParent();
1899        }
1900        return inodes;
1901      }
1902      
1903      private static INode[] getFullPathINodes(INode inode) {
1904        return getRelativePathINodes(inode, null);
1905      }
1906      
1907      /** Return the full path name of the specified inode */
1908      static String getFullPathName(INode inode) {
1909        INode[] inodes = getFullPathINodes(inode);
1910        // inodes can be null only when its called without holding lock
1911        return inodes == null ? "" : getFullPathName(inodes, inodes.length - 1);
1912      }
1913      
1914      /**
1915       * Create a directory 
1916       * If ancestor directories do not exist, automatically create them.
1917    
1918       * @param src string representation of the path to the directory
1919       * @param permissions the permission of the directory
1920       * @param isAutocreate if the permission of the directory should inherit
1921       *                          from its parent or not. u+wx is implicitly added to
1922       *                          the automatically created directories, and to the
1923       *                          given directory if inheritPermission is true
1924       * @param now creation time
1925       * @return true if the operation succeeds false otherwise
1926       * @throws FileNotFoundException if an ancestor or itself is a file
1927       * @throws QuotaExceededException if directory creation violates 
1928       *                                any quota limit
1929       * @throws UnresolvedLinkException if a symlink is encountered in src.                      
1930       * @throws SnapshotAccessControlException if path is in RO snapshot
1931       */
1932      boolean mkdirs(String src, PermissionStatus permissions,
1933          boolean inheritPermission, long now)
1934          throws FileAlreadyExistsException, QuotaExceededException, 
1935                 UnresolvedLinkException, SnapshotAccessControlException,
1936                 AclException {
1937        src = normalizePath(src);
1938        String[] names = INode.getPathNames(src);
1939        byte[][] components = INode.getPathComponents(names);
1940        final int lastInodeIndex = components.length - 1;
1941    
1942        writeLock();
1943        try {
1944          INodesInPath iip = getExistingPathINodes(components);
1945          if (iip.isSnapshot()) {
1946            throw new SnapshotAccessControlException(
1947                "Modification on RO snapshot is disallowed");
1948          }
1949          INode[] inodes = iip.getINodes();
1950    
1951          // find the index of the first null in inodes[]
1952          StringBuilder pathbuilder = new StringBuilder();
1953          int i = 1;
1954          for(; i < inodes.length && inodes[i] != null; i++) {
1955            pathbuilder.append(Path.SEPARATOR).append(names[i]);
1956            if (!inodes[i].isDirectory()) {
1957              throw new FileAlreadyExistsException("Parent path is not a directory: "
1958                  + pathbuilder+ " "+inodes[i].getLocalName());
1959            }
1960          }
1961    
1962          // default to creating parent dirs with the given perms
1963          PermissionStatus parentPermissions = permissions;
1964    
1965          // if not inheriting and it's the last inode, there's no use in
1966          // computing perms that won't be used
1967          if (inheritPermission || (i < lastInodeIndex)) {
1968            // if inheriting (ie. creating a file or symlink), use the parent dir,
1969            // else the supplied permissions
1970            // NOTE: the permissions of the auto-created directories violate posix
1971            FsPermission parentFsPerm = inheritPermission
1972                ? inodes[i-1].getFsPermission() : permissions.getPermission();
1973            
1974            // ensure that the permissions allow user write+execute
1975            if (!parentFsPerm.getUserAction().implies(FsAction.WRITE_EXECUTE)) {
1976              parentFsPerm = new FsPermission(
1977                  parentFsPerm.getUserAction().or(FsAction.WRITE_EXECUTE),
1978                  parentFsPerm.getGroupAction(),
1979                  parentFsPerm.getOtherAction()
1980              );
1981            }
1982            
1983            if (!parentPermissions.getPermission().equals(parentFsPerm)) {
1984              parentPermissions = new PermissionStatus(
1985                  parentPermissions.getUserName(),
1986                  parentPermissions.getGroupName(),
1987                  parentFsPerm
1988              );
1989              // when inheriting, use same perms for entire path
1990              if (inheritPermission) permissions = parentPermissions;
1991            }
1992          }
1993          
1994          // create directories beginning from the first null index
1995          for(; i < inodes.length; i++) {
1996            pathbuilder.append(Path.SEPARATOR + names[i]);
1997            unprotectedMkdir(namesystem.allocateNewInodeId(), iip, i,
1998                components[i], (i < lastInodeIndex) ? parentPermissions
1999                    : permissions, null, now);
2000            if (inodes[i] == null) {
2001              return false;
2002            }
2003            // Directory creation also count towards FilesCreated
2004            // to match count of FilesDeleted metric.
2005            if (getFSNamesystem() != null)
2006              NameNode.getNameNodeMetrics().incrFilesCreated();
2007    
2008            final String cur = pathbuilder.toString();
2009            fsImage.getEditLog().logMkDir(cur, inodes[i]);
2010            if(NameNode.stateChangeLog.isDebugEnabled()) {
2011              NameNode.stateChangeLog.debug(
2012                  "DIR* FSDirectory.mkdirs: created directory " + cur);
2013            }
2014          }
2015        } finally {
2016          writeUnlock();
2017        }
2018        return true;
2019      }
2020    
2021      INode unprotectedMkdir(long inodeId, String src, PermissionStatus permissions,
2022                              List<AclEntry> aclEntries, long timestamp)
2023          throws QuotaExceededException, UnresolvedLinkException, AclException {
2024        assert hasWriteLock();
2025        byte[][] components = INode.getPathComponents(src);
2026        INodesInPath iip = getExistingPathINodes(components);
2027        INode[] inodes = iip.getINodes();
2028        final int pos = inodes.length - 1;
2029        unprotectedMkdir(inodeId, iip, pos, components[pos], permissions, aclEntries,
2030            timestamp);
2031        return inodes[pos];
2032      }
2033    
2034      /** create a directory at index pos.
2035       * The parent path to the directory is at [0, pos-1].
2036       * All ancestors exist. Newly created one stored at index pos.
2037       */
2038      private void unprotectedMkdir(long inodeId, INodesInPath inodesInPath,
2039          int pos, byte[] name, PermissionStatus permission,
2040          List<AclEntry> aclEntries, long timestamp)
2041          throws QuotaExceededException, AclException {
2042        assert hasWriteLock();
2043        final INodeDirectory dir = new INodeDirectory(inodeId, name, permission,
2044            timestamp);
2045        if (addChild(inodesInPath, pos, dir, true)) {
2046          if (aclEntries != null) {
2047            AclStorage.updateINodeAcl(dir, aclEntries, Snapshot.CURRENT_STATE_ID);
2048          }
2049          inodesInPath.setINode(pos, dir);
2050        }
2051      }
2052      
2053      /**
2054       * Add the given child to the namespace.
2055       * @param src The full path name of the child node.
2056       * @throw QuotaExceededException is thrown if it violates quota limit
2057       */
2058      private boolean addINode(String src, INode child
2059          ) throws QuotaExceededException, UnresolvedLinkException {
2060        byte[][] components = INode.getPathComponents(src);
2061        child.setLocalName(components[components.length-1]);
2062        cacheName(child);
2063        writeLock();
2064        try {
2065          return addLastINode(getExistingPathINodes(components), child, true);
2066        } finally {
2067          writeUnlock();
2068        }
2069      }
2070    
2071      /**
2072       * Verify quota for adding or moving a new INode with required 
2073       * namespace and diskspace to a given position.
2074       *  
2075       * @param inodes INodes corresponding to a path
2076       * @param pos position where a new INode will be added
2077       * @param nsDelta needed namespace
2078       * @param dsDelta needed diskspace
2079       * @param commonAncestor Last node in inodes array that is a common ancestor
2080       *          for a INode that is being moved from one location to the other.
2081       *          Pass null if a node is not being moved.
2082       * @throws QuotaExceededException if quota limit is exceeded.
2083       */
2084      private static void verifyQuota(INode[] inodes, int pos, long nsDelta,
2085          long dsDelta, INode commonAncestor) throws QuotaExceededException {
2086        if (nsDelta <= 0 && dsDelta <= 0) {
2087          // if quota is being freed or not being consumed
2088          return;
2089        }
2090    
2091        // check existing components in the path
2092        for(int i = (pos > inodes.length? inodes.length: pos) - 1; i >= 0; i--) {
2093          if (commonAncestor == inodes[i]) {
2094            // Stop checking for quota when common ancestor is reached
2095            return;
2096          }
2097          final DirectoryWithQuotaFeature q
2098              = inodes[i].asDirectory().getDirectoryWithQuotaFeature();
2099          if (q != null) { // a directory with quota
2100            try {
2101              q.verifyQuota(nsDelta, dsDelta);
2102            } catch (QuotaExceededException e) {
2103              e.setPathName(getFullPathName(inodes, i));
2104              throw e;
2105            }
2106          }
2107        }
2108      }
2109      
2110      /**
2111       * Verify quota for rename operation where srcInodes[srcInodes.length-1] moves
2112       * dstInodes[dstInodes.length-1]
2113       * 
2114       * @param src directory from where node is being moved.
2115       * @param dst directory to where node is moved to.
2116       * @throws QuotaExceededException if quota limit is exceeded.
2117       */
2118      private void verifyQuotaForRename(INode[] src, INode[] dst)
2119          throws QuotaExceededException {
2120        if (!ready) {
2121          // Do not check quota if edits log is still being processed
2122          return;
2123        }
2124        int i = 0;
2125        for(; src[i] == dst[i]; i++);
2126        // src[i - 1] is the last common ancestor.
2127    
2128        final Quota.Counts delta = src[src.length - 1].computeQuotaUsage();
2129        
2130        // Reduce the required quota by dst that is being removed
2131        final int dstIndex = dst.length - 1;
2132        if (dst[dstIndex] != null) {
2133          delta.subtract(dst[dstIndex].computeQuotaUsage());
2134        }
2135        verifyQuota(dst, dstIndex, delta.get(Quota.NAMESPACE),
2136            delta.get(Quota.DISKSPACE), src[i - 1]);
2137      }
2138    
2139      /**
2140       * Checks file system limits (max component length and max directory items)
2141       * during a rename operation.
2142       *
2143       * @param srcIIP INodesInPath containing every inode in the rename source
2144       * @param dstIIP INodesInPath containing every inode in the rename destination
2145       * @throws PathComponentTooLongException child's name is too long.
2146       * @throws MaxDirectoryItemsExceededException too many children.
2147       */
2148      private void verifyFsLimitsForRename(INodesInPath srcIIP, INodesInPath dstIIP)
2149          throws PathComponentTooLongException, MaxDirectoryItemsExceededException {
2150        byte[] dstChildName = dstIIP.getLastLocalName();
2151        INode[] dstInodes = dstIIP.getINodes();
2152        int pos = dstInodes.length - 1;
2153        verifyMaxComponentLength(dstChildName, dstInodes, pos);
2154        // Do not enforce max directory items if renaming within same directory.
2155        if (srcIIP.getINode(-2) != dstIIP.getINode(-2)) {
2156          verifyMaxDirItems(dstInodes, pos);
2157        }
2158      }
2159    
2160      /** Verify if the snapshot name is legal. */
2161      void verifySnapshotName(String snapshotName, String path)
2162          throws PathComponentTooLongException {
2163        if (snapshotName.contains(Path.SEPARATOR)) {
2164          throw new HadoopIllegalArgumentException(
2165              "Snapshot name cannot contain \"" + Path.SEPARATOR + "\"");
2166        }
2167        final byte[] bytes = DFSUtil.string2Bytes(snapshotName);
2168        verifyINodeName(bytes);
2169        verifyMaxComponentLength(bytes, path, 0);
2170      }
2171      
2172      /** Verify if the inode name is legal. */
2173      void verifyINodeName(byte[] childName) throws HadoopIllegalArgumentException {
2174        if (Arrays.equals(HdfsConstants.DOT_SNAPSHOT_DIR_BYTES, childName)) {
2175          String s = "\"" + HdfsConstants.DOT_SNAPSHOT_DIR + "\" is a reserved name.";
2176          if (!ready) {
2177            s += "  Please rename it before upgrade.";
2178          }
2179          throw new HadoopIllegalArgumentException(s);
2180        }
2181      }
2182    
2183      /**
2184       * Verify child's name for fs limit.
2185       *
2186       * @param childName byte[] containing new child name
2187       * @param parentPath Object either INode[] or String containing parent path
2188       * @param pos int position of new child in path
2189       * @throws PathComponentTooLongException child's name is too long.
2190       */
2191      private void verifyMaxComponentLength(byte[] childName, Object parentPath,
2192          int pos) throws PathComponentTooLongException {
2193        if (maxComponentLength == 0) {
2194          return;
2195        }
2196    
2197        final int length = childName.length;
2198        if (length > maxComponentLength) {
2199          final String p = parentPath instanceof INode[]?
2200              getFullPathName((INode[])parentPath, pos - 1): (String)parentPath;
2201          final PathComponentTooLongException e = new PathComponentTooLongException(
2202              maxComponentLength, length, p, DFSUtil.bytes2String(childName));
2203          if (ready) {
2204            throw e;
2205          } else {
2206            // Do not throw if edits log is still being processed
2207            NameNode.LOG.error("ERROR in FSDirectory.verifyINodeName", e);
2208          }
2209        }
2210      }
2211    
2212      /**
2213       * Verify children size for fs limit.
2214       *
2215       * @param pathComponents INode[] containing full path of inodes to new child
2216       * @param pos int position of new child in pathComponents
2217       * @throws MaxDirectoryItemsExceededException too many children.
2218       */
2219      private void verifyMaxDirItems(INode[] pathComponents, int pos)
2220          throws MaxDirectoryItemsExceededException {
2221    
2222        final INodeDirectory parent = pathComponents[pos-1].asDirectory();
2223        final int count = parent.getChildrenList(Snapshot.CURRENT_STATE_ID).size();
2224        if (count >= maxDirItems) {
2225          final MaxDirectoryItemsExceededException e
2226              = new MaxDirectoryItemsExceededException(maxDirItems, count);
2227          if (ready) {
2228            e.setPathName(getFullPathName(pathComponents, pos - 1));
2229            throw e;
2230          } else {
2231            // Do not throw if edits log is still being processed
2232            NameNode.LOG.error("FSDirectory.verifyMaxDirItems: "
2233                + e.getLocalizedMessage());
2234          }
2235        }
2236      }
2237      
2238      /**
2239       * The same as {@link #addChild(INodesInPath, int, INode, boolean)}
2240       * with pos = length - 1.
2241       */
2242      private boolean addLastINode(INodesInPath inodesInPath,
2243          INode inode, boolean checkQuota) throws QuotaExceededException {
2244        final int pos = inodesInPath.getINodes().length - 1;
2245        return addChild(inodesInPath, pos, inode, checkQuota);
2246      }
2247    
2248      /** Add a node child to the inodes at index pos. 
2249       * Its ancestors are stored at [0, pos-1].
2250       * @return false if the child with this name already exists; 
2251       *         otherwise return true;
2252       * @throw QuotaExceededException is thrown if it violates quota limit
2253       */
2254      private boolean addChild(INodesInPath iip, int pos,
2255          INode child, boolean checkQuota) throws QuotaExceededException {
2256        final INode[] inodes = iip.getINodes();
2257        // Disallow creation of /.reserved. This may be created when loading
2258        // editlog/fsimage during upgrade since /.reserved was a valid name in older
2259        // release. This may also be called when a user tries to create a file
2260        // or directory /.reserved.
2261        if (pos == 1 && inodes[0] == rootDir && isReservedName(child)) {
2262          throw new HadoopIllegalArgumentException(
2263              "File name \"" + child.getLocalName() + "\" is reserved and cannot "
2264                  + "be created. If this is during upgrade change the name of the "
2265                  + "existing file or directory to another name before upgrading "
2266                  + "to the new release.");
2267        }
2268        // The filesystem limits are not really quotas, so this check may appear
2269        // odd. It's because a rename operation deletes the src, tries to add
2270        // to the dest, if that fails, re-adds the src from whence it came.
2271        // The rename code disables the quota when it's restoring to the
2272        // original location becase a quota violation would cause the the item
2273        // to go "poof".  The fs limits must be bypassed for the same reason.
2274        if (checkQuota) {
2275          verifyMaxComponentLength(child.getLocalNameBytes(), inodes, pos);
2276          verifyMaxDirItems(inodes, pos);
2277        }
2278        // always verify inode name
2279        verifyINodeName(child.getLocalNameBytes());
2280        
2281        final Quota.Counts counts = child.computeQuotaUsage();
2282        updateCount(iip, pos,
2283            counts.get(Quota.NAMESPACE), counts.get(Quota.DISKSPACE), checkQuota);
2284        boolean isRename = (child.getParent() != null);
2285        final INodeDirectory parent = inodes[pos-1].asDirectory();
2286        boolean added = false;
2287        try {
2288          added = parent.addChild(child, true, iip.getLatestSnapshotId());
2289        } catch (QuotaExceededException e) {
2290          updateCountNoQuotaCheck(iip, pos,
2291              -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2292          throw e;
2293        }
2294        if (!added) {
2295          updateCountNoQuotaCheck(iip, pos,
2296              -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2297        } else {
2298          iip.setINode(pos - 1, child.getParent());
2299          if (!isRename) {
2300            AclStorage.copyINodeDefaultAcl(child);
2301          }
2302          addToInodeMap(child);
2303        }
2304        return added;
2305      }
2306      
2307      private boolean addLastINodeNoQuotaCheck(INodesInPath inodesInPath, INode i) {
2308        try {
2309          return addLastINode(inodesInPath, i, false);
2310        } catch (QuotaExceededException e) {
2311          NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e);
2312        }
2313        return false;
2314      }
2315      
2316      /**
2317       * Remove the last inode in the path from the namespace.
2318       * Count of each ancestor with quota is also updated.
2319       * @return -1 for failing to remove;
2320       *          0 for removing a reference whose referred inode has other 
2321       *            reference nodes;
2322       *         >0 otherwise. 
2323       */
2324      private long removeLastINode(final INodesInPath iip)
2325          throws QuotaExceededException {
2326        final int latestSnapshot = iip.getLatestSnapshotId();
2327        final INode last = iip.getLastINode();
2328        final INodeDirectory parent = iip.getINode(-2).asDirectory();
2329        if (!parent.removeChild(last, latestSnapshot)) {
2330          return -1;
2331        }
2332        INodeDirectory newParent = last.getParent();
2333        if (parent != newParent) {
2334          iip.setINode(-2, newParent);
2335        }
2336        
2337        if (!last.isInLatestSnapshot(latestSnapshot)) {
2338          final Quota.Counts counts = last.computeQuotaUsage();
2339          updateCountNoQuotaCheck(iip, iip.getINodes().length - 1,
2340              -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2341    
2342          if (INodeReference.tryRemoveReference(last) > 0) {
2343            return 0;
2344          } else {
2345            return counts.get(Quota.NAMESPACE);
2346          }
2347        }
2348        return 1;
2349      }
2350      
2351      /**
2352       */
2353      String normalizePath(String src) {
2354        if (src.length() > 1 && src.endsWith("/")) {
2355          src = src.substring(0, src.length() - 1);
2356        }
2357        return src;
2358      }
2359    
2360      ContentSummary getContentSummary(String src) 
2361        throws FileNotFoundException, UnresolvedLinkException {
2362        String srcs = normalizePath(src);
2363        readLock();
2364        try {
2365          INode targetNode = rootDir.getNode(srcs, false);
2366          if (targetNode == null) {
2367            throw new FileNotFoundException("File does not exist: " + srcs);
2368          }
2369          else {
2370            // Make it relinquish locks everytime contentCountLimit entries are
2371            // processed. 0 means disabled. I.e. blocking for the entire duration.
2372            ContentSummaryComputationContext cscc =
2373    
2374                new ContentSummaryComputationContext(this, getFSNamesystem(),
2375                contentCountLimit);
2376            ContentSummary cs = targetNode.computeAndConvertContentSummary(cscc);
2377            yieldCount += cscc.getYieldCount();
2378            return cs;
2379          }
2380        } finally {
2381          readUnlock();
2382        }
2383      }
2384    
2385      @VisibleForTesting
2386      public long getYieldCount() {
2387        return yieldCount;
2388      }
2389    
2390      public INodeMap getINodeMap() {
2391        return inodeMap;
2392      }
2393      
2394      /**
2395       * This method is always called with writeLock of FSDirectory held.
2396       */
2397      public final void addToInodeMap(INode inode) {
2398        if (inode instanceof INodeWithAdditionalFields) {
2399          inodeMap.put((INodeWithAdditionalFields)inode);
2400        }
2401      }
2402    
2403      
2404      /**
2405       * This method is always called with writeLock of FSDirectory held.
2406       */
2407      public final void removeFromInodeMap(List<? extends INode> inodes) {
2408        if (inodes != null) {
2409          for (INode inode : inodes) {
2410            if (inode != null && inode instanceof INodeWithAdditionalFields) {
2411              inodeMap.remove(inode);
2412            }
2413          }
2414        }
2415      }
2416      
2417      /**
2418       * Get the inode from inodeMap based on its inode id.
2419       * @param id The given id
2420       * @return The inode associated with the given id
2421       */
2422      public INode getInode(long id) {
2423        readLock();
2424        try {
2425          return inodeMap.get(id);
2426        } finally {
2427          readUnlock();
2428        }
2429      }
2430      
2431      @VisibleForTesting
2432      int getInodeMapSize() {
2433        return inodeMap.size();
2434      }
2435      
2436      /**
2437       * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
2438       * Sets quota for for a directory.
2439       * @returns INodeDirectory if any of the quotas have changed. null other wise.
2440       * @throws FileNotFoundException if the path does not exist.
2441       * @throws PathIsNotDirectoryException if the path is not a directory.
2442       * @throws QuotaExceededException if the directory tree size is 
2443       *                                greater than the given quota
2444       * @throws UnresolvedLinkException if a symlink is encountered in src.
2445       * @throws SnapshotAccessControlException if path is in RO snapshot
2446       */
2447      INodeDirectory unprotectedSetQuota(String src, long nsQuota, long dsQuota)
2448          throws FileNotFoundException, PathIsNotDirectoryException,
2449          QuotaExceededException, UnresolvedLinkException,
2450          SnapshotAccessControlException {
2451        assert hasWriteLock();
2452        // sanity check
2453        if ((nsQuota < 0 && nsQuota != HdfsConstants.QUOTA_DONT_SET && 
2454             nsQuota < HdfsConstants.QUOTA_RESET) || 
2455            (dsQuota < 0 && dsQuota != HdfsConstants.QUOTA_DONT_SET && 
2456              dsQuota < HdfsConstants.QUOTA_RESET)) {
2457          throw new IllegalArgumentException("Illegal value for nsQuota or " +
2458                                             "dsQuota : " + nsQuota + " and " +
2459                                             dsQuota);
2460        }
2461        
2462        String srcs = normalizePath(src);
2463        final INodesInPath iip = rootDir.getINodesInPath4Write(srcs, true);
2464        INodeDirectory dirNode = INodeDirectory.valueOf(iip.getLastINode(), srcs);
2465        if (dirNode.isRoot() && nsQuota == HdfsConstants.QUOTA_RESET) {
2466          throw new IllegalArgumentException("Cannot clear namespace quota on root.");
2467        } else { // a directory inode
2468          final Quota.Counts oldQuota = dirNode.getQuotaCounts();
2469          final long oldNsQuota = oldQuota.get(Quota.NAMESPACE);
2470          final long oldDsQuota = oldQuota.get(Quota.DISKSPACE);
2471          if (nsQuota == HdfsConstants.QUOTA_DONT_SET) {
2472            nsQuota = oldNsQuota;
2473          }
2474          if (dsQuota == HdfsConstants.QUOTA_DONT_SET) {
2475            dsQuota = oldDsQuota;
2476          }        
2477          if (oldNsQuota == nsQuota && oldDsQuota == dsQuota) {
2478            return null;
2479          }
2480    
2481          final int latest = iip.getLatestSnapshotId();
2482          dirNode = dirNode.recordModification(latest);
2483          dirNode.setQuota(nsQuota, dsQuota);
2484          return dirNode;
2485        }
2486      }
2487      
2488      /**
2489       * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
2490       * @throws SnapshotAccessControlException if path is in RO snapshot
2491       * @see #unprotectedSetQuota(String, long, long)
2492       */
2493      void setQuota(String src, long nsQuota, long dsQuota) 
2494          throws FileNotFoundException, PathIsNotDirectoryException,
2495          QuotaExceededException, UnresolvedLinkException,
2496          SnapshotAccessControlException {
2497        writeLock();
2498        try {
2499          INodeDirectory dir = unprotectedSetQuota(src, nsQuota, dsQuota);
2500          if (dir != null) {
2501            final Quota.Counts q = dir.getQuotaCounts();
2502            fsImage.getEditLog().logSetQuota(src,
2503                q.get(Quota.NAMESPACE), q.get(Quota.DISKSPACE));
2504          }
2505        } finally {
2506          writeUnlock();
2507        }
2508      }
2509      
2510      long totalInodes() {
2511        readLock();
2512        try {
2513          return rootDir.getDirectoryWithQuotaFeature().getSpaceConsumed()
2514              .get(Quota.NAMESPACE);
2515        } finally {
2516          readUnlock();
2517        }
2518      }
2519    
2520      /**
2521       * Sets the access time on the file/directory. Logs it in the transaction log.
2522       */
2523      void setTimes(String src, INode inode, long mtime, long atime, boolean force,
2524          int latestSnapshotId) throws QuotaExceededException {
2525        boolean status = false;
2526        writeLock();
2527        try {
2528          status = unprotectedSetTimes(inode, mtime, atime, force, latestSnapshotId);
2529        } finally {
2530          writeUnlock();
2531        }
2532        if (status) {
2533          fsImage.getEditLog().logTimes(src, mtime, atime);
2534        }
2535      }
2536    
2537      boolean unprotectedSetTimes(String src, long mtime, long atime, boolean force) 
2538          throws UnresolvedLinkException, QuotaExceededException {
2539        assert hasWriteLock();
2540        final INodesInPath i = getLastINodeInPath(src); 
2541        return unprotectedSetTimes(i.getLastINode(), mtime, atime, force,
2542            i.getLatestSnapshotId());
2543      }
2544    
2545      private boolean unprotectedSetTimes(INode inode, long mtime,
2546          long atime, boolean force, int latest) throws QuotaExceededException {
2547        assert hasWriteLock();
2548        boolean status = false;
2549        if (mtime != -1) {
2550          inode = inode.setModificationTime(mtime, latest);
2551          status = true;
2552        }
2553        if (atime != -1) {
2554          long inodeTime = inode.getAccessTime();
2555    
2556          // if the last access time update was within the last precision interval, then
2557          // no need to store access time
2558          if (atime <= inodeTime + getFSNamesystem().getAccessTimePrecision() && !force) {
2559            status =  false;
2560          } else {
2561            inode.setAccessTime(atime, latest);
2562            status = true;
2563          }
2564        } 
2565        return status;
2566      }
2567    
2568      /**
2569       * Reset the entire namespace tree.
2570       */
2571      void reset() {
2572        writeLock();
2573        try {
2574          setReady(false);
2575          rootDir = createRoot(getFSNamesystem());
2576          inodeMap.clear();
2577          addToInodeMap(rootDir);
2578          nameCache.reset();
2579        } finally {
2580          writeUnlock();
2581        }
2582      }
2583    
2584      /**
2585       * create an hdfs file status from an inode
2586       * 
2587       * @param path the local name
2588       * @param node inode
2589       * @param needLocation if block locations need to be included or not
2590       * @return a file status
2591       * @throws IOException if any error occurs
2592       */
2593      private HdfsFileStatus createFileStatus(byte[] path, INode node,
2594          boolean needLocation, int snapshot) throws IOException {
2595        if (needLocation) {
2596          return createLocatedFileStatus(path, node, snapshot);
2597        } else {
2598          return createFileStatus(path, node, snapshot);
2599        }
2600      }
2601      /**
2602       * Create FileStatus by file INode 
2603       */
2604       HdfsFileStatus createFileStatus(byte[] path, INode node,
2605           int snapshot) {
2606         long size = 0;     // length is zero for directories
2607         short replication = 0;
2608         long blocksize = 0;
2609         if (node.isFile()) {
2610           final INodeFile fileNode = node.asFile();
2611           size = fileNode.computeFileSize(snapshot);
2612           replication = fileNode.getFileReplication(snapshot);
2613           blocksize = fileNode.getPreferredBlockSize();
2614         }
2615         int childrenNum = node.isDirectory() ? 
2616             node.asDirectory().getChildrenNum(snapshot) : 0;
2617             
2618         return new HdfsFileStatus(
2619            size, 
2620            node.isDirectory(), 
2621            replication, 
2622            blocksize,
2623            node.getModificationTime(snapshot),
2624            node.getAccessTime(snapshot),
2625            node.getFsPermission(snapshot),
2626            node.getUserName(snapshot),
2627            node.getGroupName(snapshot),
2628            node.isSymlink() ? node.asSymlink().getSymlink() : null,
2629            path,
2630            node.getId(),
2631            childrenNum);
2632      }
2633    
2634      /**
2635       * Create FileStatus with location info by file INode
2636       */
2637      private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path,
2638          INode node, int snapshot) throws IOException {
2639        assert hasReadLock();
2640        long size = 0; // length is zero for directories
2641        short replication = 0;
2642        long blocksize = 0;
2643        LocatedBlocks loc = null;
2644        if (node.isFile()) {
2645          final INodeFile fileNode = node.asFile();
2646          size = fileNode.computeFileSize(snapshot);
2647          replication = fileNode.getFileReplication(snapshot);
2648          blocksize = fileNode.getPreferredBlockSize();
2649    
2650          final boolean inSnapshot = snapshot != Snapshot.CURRENT_STATE_ID; 
2651          final boolean isUc = inSnapshot ? false : fileNode.isUnderConstruction();
2652          final long fileSize = !inSnapshot && isUc ? 
2653              fileNode.computeFileSizeNotIncludingLastUcBlock() : size;
2654          loc = getFSNamesystem().getBlockManager().createLocatedBlocks(
2655              fileNode.getBlocks(), fileSize, isUc, 0L, size, false,
2656              inSnapshot);
2657          if (loc == null) {
2658            loc = new LocatedBlocks();
2659          }
2660        }
2661        int childrenNum = node.isDirectory() ? 
2662            node.asDirectory().getChildrenNum(snapshot) : 0;
2663            
2664        HdfsLocatedFileStatus status =
2665            new HdfsLocatedFileStatus(size, node.isDirectory(), replication,
2666              blocksize, node.getModificationTime(snapshot),
2667              node.getAccessTime(snapshot), node.getFsPermission(snapshot),
2668              node.getUserName(snapshot), node.getGroupName(snapshot),
2669              node.isSymlink() ? node.asSymlink().getSymlink() : null, path,
2670              node.getId(), loc, childrenNum);
2671            // Set caching information for the located blocks.
2672        if (loc != null) {
2673          CacheManager cacheManager = namesystem.getCacheManager();
2674          for (LocatedBlock lb: loc.getLocatedBlocks()) {
2675            cacheManager.setCachedLocations(lb);
2676          }
2677        }
2678        return status;
2679      }
2680    
2681        
2682      /**
2683       * Add the given symbolic link to the fs. Record it in the edits log.
2684       */
2685      INodeSymlink addSymlink(String path, String target,
2686          PermissionStatus dirPerms, boolean createParent, boolean logRetryCache)
2687          throws UnresolvedLinkException, FileAlreadyExistsException,
2688          QuotaExceededException, SnapshotAccessControlException, AclException {
2689        waitForReady();
2690    
2691        final long modTime = now();
2692        if (createParent) {
2693          final String parent = new Path(path).getParent().toString();
2694          if (!mkdirs(parent, dirPerms, true, modTime)) {
2695            return null;
2696          }
2697        }
2698        final String userName = dirPerms.getUserName();
2699        INodeSymlink newNode  = null;
2700        long id = namesystem.allocateNewInodeId();
2701        writeLock();
2702        try {
2703          newNode = unprotectedAddSymlink(id, path, target, modTime, modTime,
2704              new PermissionStatus(userName, null, FsPermission.getDefault()));
2705        } finally {
2706          writeUnlock();
2707        }
2708        if (newNode == null) {
2709          NameNode.stateChangeLog.info("DIR* addSymlink: failed to add " + path);
2710          return null;
2711        }
2712        fsImage.getEditLog().logSymlink(path, target, modTime, modTime, newNode,
2713            logRetryCache);
2714        
2715        if(NameNode.stateChangeLog.isDebugEnabled()) {
2716          NameNode.stateChangeLog.debug("DIR* addSymlink: " + path + " is added");
2717        }
2718        return newNode;
2719      }
2720    
2721      /**
2722       * Add the specified path into the namespace. Invoked from edit log processing.
2723       */
2724      INodeSymlink unprotectedAddSymlink(long id, String path, String target,
2725          long mtime, long atime, PermissionStatus perm)
2726          throws UnresolvedLinkException, QuotaExceededException {
2727        assert hasWriteLock();
2728        final INodeSymlink symlink = new INodeSymlink(id, null, perm, mtime, atime,
2729            target);
2730        return addINode(path, symlink) ? symlink : null;
2731      }
2732    
2733      void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
2734        writeLock();
2735        try {
2736          List<AclEntry> newAcl = unprotectedModifyAclEntries(src, aclSpec);
2737          fsImage.getEditLog().logSetAcl(src, newAcl);
2738        } finally {
2739          writeUnlock();
2740        }
2741      }
2742    
2743      private List<AclEntry> unprotectedModifyAclEntries(String src,
2744          List<AclEntry> aclSpec) throws IOException {
2745        assert hasWriteLock();
2746        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2747        INode inode = resolveLastINode(src, iip);
2748        int snapshotId = iip.getLatestSnapshotId();
2749        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2750        List<AclEntry> newAcl = AclTransformation.mergeAclEntries(existingAcl,
2751          aclSpec);
2752        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2753        return newAcl;
2754      }
2755    
2756      void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
2757        writeLock();
2758        try {
2759          List<AclEntry> newAcl = unprotectedRemoveAclEntries(src, aclSpec);
2760          fsImage.getEditLog().logSetAcl(src, newAcl);
2761        } finally {
2762          writeUnlock();
2763        }
2764      }
2765    
2766      private List<AclEntry> unprotectedRemoveAclEntries(String src,
2767          List<AclEntry> aclSpec) throws IOException {
2768        assert hasWriteLock();
2769        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2770        INode inode = resolveLastINode(src, iip);
2771        int snapshotId = iip.getLatestSnapshotId();
2772        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2773        List<AclEntry> newAcl = AclTransformation.filterAclEntriesByAclSpec(
2774          existingAcl, aclSpec);
2775        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2776        return newAcl;
2777      }
2778    
2779      void removeDefaultAcl(String src) throws IOException {
2780        writeLock();
2781        try {
2782          List<AclEntry> newAcl = unprotectedRemoveDefaultAcl(src);
2783          fsImage.getEditLog().logSetAcl(src, newAcl);
2784        } finally {
2785          writeUnlock();
2786        }
2787      }
2788    
2789      private List<AclEntry> unprotectedRemoveDefaultAcl(String src)
2790          throws IOException {
2791        assert hasWriteLock();
2792        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2793        INode inode = resolveLastINode(src, iip);
2794        int snapshotId = iip.getLatestSnapshotId();
2795        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2796        List<AclEntry> newAcl = AclTransformation.filterDefaultAclEntries(
2797          existingAcl);
2798        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2799        return newAcl;
2800      }
2801    
2802      void removeAcl(String src) throws IOException {
2803        writeLock();
2804        try {
2805          unprotectedRemoveAcl(src);
2806          fsImage.getEditLog().logSetAcl(src, AclFeature.EMPTY_ENTRY_LIST);
2807        } finally {
2808          writeUnlock();
2809        }
2810      }
2811    
2812      private void unprotectedRemoveAcl(String src) throws IOException {
2813        assert hasWriteLock();
2814        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2815        INode inode = resolveLastINode(src, iip);
2816        int snapshotId = iip.getLatestSnapshotId();
2817        AclStorage.removeINodeAcl(inode, snapshotId);
2818      }
2819    
2820      void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
2821        writeLock();
2822        try {
2823          List<AclEntry> newAcl = unprotectedSetAcl(src, aclSpec);
2824          fsImage.getEditLog().logSetAcl(src, newAcl);
2825        } finally {
2826          writeUnlock();
2827        }
2828      }
2829    
2830      List<AclEntry> unprotectedSetAcl(String src, List<AclEntry> aclSpec)
2831          throws IOException {
2832        // ACL removal is logged to edits as OP_SET_ACL with an empty list.
2833        if (aclSpec.isEmpty()) {
2834          unprotectedRemoveAcl(src);
2835          return AclFeature.EMPTY_ENTRY_LIST;
2836        }
2837    
2838        assert hasWriteLock();
2839        INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2840        INode inode = resolveLastINode(src, iip);
2841        int snapshotId = iip.getLatestSnapshotId();
2842        List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2843        List<AclEntry> newAcl = AclTransformation.replaceAclEntries(existingAcl,
2844          aclSpec);
2845        AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2846        return newAcl;
2847      }
2848    
2849      AclStatus getAclStatus(String src) throws IOException {
2850        String srcs = normalizePath(src);
2851        readLock();
2852        try {
2853          // There is no real inode for the path ending in ".snapshot", so return a
2854          // non-null, unpopulated AclStatus.  This is similar to getFileInfo.
2855          if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR) &&
2856              getINode4DotSnapshot(srcs) != null) {
2857            return new AclStatus.Builder().owner("").group("").build();
2858          }
2859          INodesInPath iip = rootDir.getLastINodeInPath(srcs, true);
2860          INode inode = resolveLastINode(src, iip);
2861          int snapshotId = iip.getPathSnapshotId();
2862          List<AclEntry> acl = AclStorage.readINodeAcl(inode, snapshotId);
2863          return new AclStatus.Builder()
2864              .owner(inode.getUserName()).group(inode.getGroupName())
2865              .stickyBit(inode.getFsPermission(snapshotId).getStickyBit())
2866              .addEntries(acl).build();
2867        } finally {
2868          readUnlock();
2869        }
2870      }
2871    
2872      private static INode resolveLastINode(String src, INodesInPath iip)
2873          throws FileNotFoundException {
2874        INode inode = iip.getLastINode();
2875        if (inode == null)
2876          throw new FileNotFoundException("cannot find " + src);
2877        return inode;
2878      }
2879    
2880      /**
2881       * Caches frequently used file names to reuse file name objects and
2882       * reduce heap size.
2883       */
2884      void cacheName(INode inode) {
2885        // Name is cached only for files
2886        if (!inode.isFile()) {
2887          return;
2888        }
2889        ByteArray name = new ByteArray(inode.getLocalNameBytes());
2890        name = nameCache.put(name);
2891        if (name != null) {
2892          inode.setLocalName(name.getBytes());
2893        }
2894      }
2895      
2896      void shutdown() {
2897        nameCache.reset();
2898        inodeMap.clear();
2899      }
2900      
2901      /**
2902       * Given an INode get all the path complents leading to it from the root.
2903       * If an Inode corresponding to C is given in /A/B/C, the returned
2904       * patch components will be {root, A, B, C}
2905       */
2906      static byte[][] getPathComponents(INode inode) {
2907        List<byte[]> components = new ArrayList<byte[]>();
2908        components.add(0, inode.getLocalNameBytes());
2909        while(inode.getParent() != null) {
2910          components.add(0, inode.getParent().getLocalNameBytes());
2911          inode = inode.getParent();
2912        }
2913        return components.toArray(new byte[components.size()][]);
2914      }
2915      
2916      /**
2917       * @return path components for reserved path, else null.
2918       */
2919      static byte[][] getPathComponentsForReservedPath(String src) {
2920        return !isReservedName(src) ? null : INode.getPathComponents(src);
2921      }
2922      
2923      /**
2924       * Resolve the path of /.reserved/.inodes/<inodeid>/... to a regular path
2925       * 
2926       * @param src path that is being processed
2927       * @param pathComponents path components corresponding to the path
2928       * @param fsd FSDirectory
2929       * @return if the path indicates an inode, return path after replacing upto
2930       *         <inodeid> with the corresponding path of the inode, else the path
2931       *         in {@code src} as is.
2932       * @throws FileNotFoundException if inodeid is invalid
2933       */
2934      static String resolvePath(String src, byte[][] pathComponents, FSDirectory fsd)
2935          throws FileNotFoundException {
2936        if (pathComponents == null || pathComponents.length <= 3) {
2937          return src;
2938        }
2939        // Not /.reserved/.inodes
2940        if (!Arrays.equals(DOT_RESERVED, pathComponents[1])
2941            || !Arrays.equals(DOT_INODES, pathComponents[2])) { // Not .inodes path
2942          return src;
2943        }
2944        final String inodeId = DFSUtil.bytes2String(pathComponents[3]);
2945        long id = 0;
2946        try {
2947          id = Long.valueOf(inodeId);
2948        } catch (NumberFormatException e) {
2949          throw new FileNotFoundException("Invalid inode path: " + src);
2950        }
2951        if (id == INodeId.ROOT_INODE_ID && pathComponents.length == 4) {
2952          return Path.SEPARATOR;
2953        }
2954        INode inode = fsd.getInode(id);
2955        if (inode == null) {
2956          throw new FileNotFoundException(
2957              "File for given inode path does not exist: " + src);
2958        }
2959        
2960        // Handle single ".." for NFS lookup support.
2961        if ((pathComponents.length > 4)
2962            && DFSUtil.bytes2String(pathComponents[4]).equals("..")) {
2963          INode parent = inode.getParent();
2964          if (parent == null || parent.getId() == INodeId.ROOT_INODE_ID) {
2965            // inode is root, or its parent is root.
2966            return Path.SEPARATOR;
2967          } else {
2968            return parent.getFullPathName();
2969          }
2970        }
2971    
2972        StringBuilder path = id == INodeId.ROOT_INODE_ID ? new StringBuilder()
2973            : new StringBuilder(inode.getFullPathName());
2974        for (int i = 4; i < pathComponents.length; i++) {
2975          path.append(Path.SEPARATOR).append(DFSUtil.bytes2String(pathComponents[i]));
2976        }
2977        if (NameNode.LOG.isDebugEnabled()) {
2978          NameNode.LOG.debug("Resolved path is " + path);
2979        }
2980        return path.toString();
2981      }
2982      
2983      /** Check if a given inode name is reserved */
2984      public static boolean isReservedName(INode inode) {
2985        return CHECK_RESERVED_FILE_NAMES
2986            && Arrays.equals(inode.getLocalNameBytes(), DOT_RESERVED);
2987      }
2988      
2989      /** Check if a given path is reserved */
2990      public static boolean isReservedName(String src) {
2991        return src.startsWith(DOT_RESERVED_PATH_PREFIX);
2992      }
2993    }