001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
021    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
022    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
023    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
024    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
025    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
026    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
027    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
028    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
029    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
030    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
031    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
032    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
033    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
034    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
035    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
036    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
037    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
038    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
039    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
040    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
041    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
042    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
043    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
044    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
045    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
046    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
047    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
048    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
049    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
050    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
051    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
052    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
053    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
054    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
055    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
056    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
057    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
058    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
059    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
060    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
061    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
062    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
063    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
064    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
065    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
066    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
067    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
068    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
069    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
070    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
071    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
072    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
073    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
074    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
075    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
076    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
077    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
078    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
079    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
080    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
081    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
082    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
083    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
084    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
085    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
086    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
087    import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
088    import static org.apache.hadoop.util.Time.now;
089    
090    import java.io.BufferedWriter;
091    import java.io.ByteArrayInputStream;
092    import java.io.DataInput;
093    import java.io.DataInputStream;
094    import java.io.File;
095    import java.io.FileNotFoundException;
096    import java.io.FileOutputStream;
097    import java.io.IOException;
098    import java.io.OutputStreamWriter;
099    import java.io.PrintWriter;
100    import java.io.StringWriter;
101    import java.lang.management.ManagementFactory;
102    import java.net.InetAddress;
103    import java.net.URI;
104    import java.util.ArrayList;
105    import java.util.Arrays;
106    import java.util.Collection;
107    import java.util.Collections;
108    import java.util.Date;
109    import java.util.EnumSet;
110    import java.util.HashMap;
111    import java.util.HashSet;
112    import java.util.Iterator;
113    import java.util.LinkedHashSet;
114    import java.util.List;
115    import java.util.Map;
116    import java.util.Set;
117    import java.util.concurrent.TimeUnit;
118    import java.util.concurrent.locks.ReentrantLock;
119    import java.util.concurrent.locks.ReentrantReadWriteLock;
120    
121    import javax.management.NotCompliantMBeanException;
122    import javax.management.ObjectName;
123    import javax.management.StandardMBean;
124    
125    import org.apache.commons.logging.Log;
126    import org.apache.commons.logging.LogFactory;
127    import org.apache.commons.logging.impl.Log4JLogger;
128    import org.apache.hadoop.HadoopIllegalArgumentException;
129    import org.apache.hadoop.classification.InterfaceAudience;
130    import org.apache.hadoop.conf.Configuration;
131    import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
132    import org.apache.hadoop.fs.CacheFlag;
133    import org.apache.hadoop.fs.ContentSummary;
134    import org.apache.hadoop.fs.CreateFlag;
135    import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
136    import org.apache.hadoop.fs.FileAlreadyExistsException;
137    import org.apache.hadoop.fs.FileStatus;
138    import org.apache.hadoop.fs.FileSystem;
139    import org.apache.hadoop.fs.FsServerDefaults;
140    import org.apache.hadoop.fs.InvalidPathException;
141    import org.apache.hadoop.fs.Options;
142    import org.apache.hadoop.fs.Options.Rename;
143    import org.apache.hadoop.fs.ParentNotDirectoryException;
144    import org.apache.hadoop.fs.Path;
145    import org.apache.hadoop.fs.UnresolvedLinkException;
146    import org.apache.hadoop.fs.permission.AclEntry;
147    import org.apache.hadoop.fs.permission.AclStatus;
148    import org.apache.hadoop.fs.permission.FsAction;
149    import org.apache.hadoop.fs.permission.FsPermission;
150    import org.apache.hadoop.fs.permission.PermissionStatus;
151    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
152    import org.apache.hadoop.ha.ServiceFailedException;
153    import org.apache.hadoop.hdfs.DFSConfigKeys;
154    import org.apache.hadoop.hdfs.DFSUtil;
155    import org.apache.hadoop.hdfs.HAUtil;
156    import org.apache.hadoop.hdfs.HdfsConfiguration;
157    import org.apache.hadoop.hdfs.StorageType;
158    import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
159    import org.apache.hadoop.hdfs.protocol.Block;
160    import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
161    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
162    import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
163    import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
164    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
165    import org.apache.hadoop.hdfs.protocol.DatanodeID;
166    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
167    import org.apache.hadoop.hdfs.protocol.DirectoryListing;
168    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
169    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
170    import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
171    import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
172    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
173    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
174    import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
175    import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
176    import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
177    import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
178    import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
179    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
180    import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
181    import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
182    import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
183    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
184    import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
185    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
186    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
187    import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
188    import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
189    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
190    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
191    import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
192    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
193    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
194    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
195    import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
196    import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
197    import org.apache.hadoop.hdfs.server.common.GenerationStamp;
198    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
199    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
200    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
201    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
202    import org.apache.hadoop.hdfs.server.common.Storage;
203    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
204    import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
205    import org.apache.hadoop.hdfs.server.common.Util;
206    import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
207    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
208    import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
209    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
210    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
211    import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
212    import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
213    import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
214    import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
215    import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
216    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
217    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
218    import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo;
219    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
220    import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
221    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
222    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
223    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
224    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
225    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
226    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
227    import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
228    import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
229    import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
230    import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
231    import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
232    import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
233    import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
234    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
235    import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
236    import org.apache.hadoop.hdfs.server.protocol.StorageReport;
237    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
238    import org.apache.hadoop.io.IOUtils;
239    import org.apache.hadoop.io.Text;
240    import org.apache.hadoop.ipc.RetriableException;
241    import org.apache.hadoop.ipc.RetryCache;
242    import org.apache.hadoop.ipc.RetryCache.CacheEntry;
243    import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
244    import org.apache.hadoop.ipc.Server;
245    import org.apache.hadoop.ipc.StandbyException;
246    import org.apache.hadoop.metrics2.annotation.Metric;
247    import org.apache.hadoop.metrics2.annotation.Metrics;
248    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
249    import org.apache.hadoop.metrics2.util.MBeans;
250    import org.apache.hadoop.net.NetworkTopology;
251    import org.apache.hadoop.net.Node;
252    import org.apache.hadoop.security.AccessControlException;
253    import org.apache.hadoop.security.UserGroupInformation;
254    import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
255    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
256    import org.apache.hadoop.security.token.Token;
257    import org.apache.hadoop.security.token.TokenIdentifier;
258    import org.apache.hadoop.security.token.delegation.DelegationKey;
259    import org.apache.hadoop.util.Daemon;
260    import org.apache.hadoop.util.DataChecksum;
261    import org.apache.hadoop.util.StringUtils;
262    import org.apache.hadoop.util.Time;
263    import org.apache.hadoop.util.VersionInfo;
264    import org.apache.log4j.Appender;
265    import org.apache.log4j.AsyncAppender;
266    import org.apache.log4j.Logger;
267    import org.mortbay.util.ajax.JSON;
268    
269    import com.google.common.annotations.VisibleForTesting;
270    import com.google.common.base.Charsets;
271    import com.google.common.base.Preconditions;
272    import com.google.common.collect.ImmutableMap;
273    import com.google.common.collect.Lists;
274    
275    /***************************************************
276     * FSNamesystem does the actual bookkeeping work for the
277     * DataNode.
278     *
279     * It tracks several important tables.
280     *
281     * 1)  valid fsname --> blocklist  (kept on disk, logged)
282     * 2)  Set of all valid blocks (inverted #1)
283     * 3)  block --> machinelist (kept in memory, rebuilt dynamically from reports)
284     * 4)  machine --> blocklist (inverted #2)
285     * 5)  LRU cache of updated-heartbeat machines
286     ***************************************************/
287    @InterfaceAudience.Private
288    @Metrics(context="dfs")
289    public class FSNamesystem implements Namesystem, FSClusterStats,
290        FSNamesystemMBean, NameNodeMXBean {
291      public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
292    
293      private static final ThreadLocal<StringBuilder> auditBuffer =
294        new ThreadLocal<StringBuilder>() {
295          @Override
296          protected StringBuilder initialValue() {
297            return new StringBuilder();
298          }
299      };
300    
301      @VisibleForTesting
302      public boolean isAuditEnabled() {
303        return !isDefaultAuditLogger || auditLog.isInfoEnabled();
304      }
305    
306      private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
307          throws IOException {
308        return (isAuditEnabled() && isExternalInvocation())
309            ? dir.getFileInfo(path, resolveSymlink) : null;
310      }
311      
312      private void logAuditEvent(boolean succeeded, String cmd, String src)
313          throws IOException {
314        logAuditEvent(succeeded, cmd, src, null, null);
315      }
316      
317      private void logAuditEvent(boolean succeeded, String cmd, String src,
318          String dst, HdfsFileStatus stat) throws IOException {
319        if (isAuditEnabled() && isExternalInvocation()) {
320          logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
321                        cmd, src, dst, stat);
322        }
323      }
324    
325      private void logAuditEvent(boolean succeeded,
326          UserGroupInformation ugi, InetAddress addr, String cmd, String src,
327          String dst, HdfsFileStatus stat) {
328        FileStatus status = null;
329        if (stat != null) {
330          Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
331          Path path = dst != null ? new Path(dst) : new Path(src);
332          status = new FileStatus(stat.getLen(), stat.isDir(),
333              stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
334              stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
335              stat.getGroup(), symlink, path);
336        }
337        for (AuditLogger logger : auditLoggers) {
338          if (logger instanceof HdfsAuditLogger) {
339            HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
340            hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
341                status, ugi, dtSecretManager);
342          } else {
343            logger.logAuditEvent(succeeded, ugi.toString(), addr,
344                cmd, src, dst, status);
345          }
346        }
347      }
348    
349      /**
350       * Logger for audit events, noting successful FSNamesystem operations. Emits
351       * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
352       * <code>key=value</code> pairs to be written for the following properties:
353       * <code>
354       * ugi=&lt;ugi in RPC&gt;
355       * ip=&lt;remote IP&gt;
356       * cmd=&lt;command&gt;
357       * src=&lt;src path&gt;
358       * dst=&lt;dst path (optional)&gt;
359       * perm=&lt;permissions (optional)&gt;
360       * </code>
361       */
362      public static final Log auditLog = LogFactory.getLog(
363          FSNamesystem.class.getName() + ".audit");
364    
365      static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
366      static int BLOCK_DELETION_INCREMENT = 1000;
367      private final boolean isPermissionEnabled;
368      private final UserGroupInformation fsOwner;
369      private final String fsOwnerShortUserName;
370      private final String supergroup;
371      private final boolean standbyShouldCheckpoint;
372      
373      // Scan interval is not configurable.
374      private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
375        TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
376      final DelegationTokenSecretManager dtSecretManager;
377      private final boolean alwaysUseDelegationTokensForTests;
378    
379      private static final Step STEP_AWAITING_REPORTED_BLOCKS =
380        new Step(StepType.AWAITING_REPORTED_BLOCKS);
381    
382      // Tracks whether the default audit logger is the only configured audit
383      // logger; this allows isAuditEnabled() to return false in case the
384      // underlying logger is disabled, and avoid some unnecessary work.
385      private final boolean isDefaultAuditLogger;
386      private final List<AuditLogger> auditLoggers;
387    
388      /** The namespace tree. */
389      FSDirectory dir;
390      private final BlockManager blockManager;
391      private final SnapshotManager snapshotManager;
392      private final CacheManager cacheManager;
393      private final DatanodeStatistics datanodeStatistics;
394    
395      private RollingUpgradeInfo rollingUpgradeInfo = null;
396      /**
397       * A flag that indicates whether the checkpointer should checkpoint a rollback
398       * fsimage. The edit log tailer sets this flag. The checkpoint will create a
399       * rollback fsimage if the flag is true, and then change the flag to false.
400       */
401      private volatile boolean needRollbackFsImage;
402    
403      // Block pool ID used by this namenode
404      private String blockPoolId;
405    
406      final LeaseManager leaseManager = new LeaseManager(this); 
407    
408      volatile Daemon smmthread = null;  // SafeModeMonitor thread
409      
410      Daemon nnrmthread = null; // NamenodeResourceMonitor thread
411    
412      Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
413      /**
414       * When an active namenode will roll its own edit log, in # edits
415       */
416      private final long editLogRollerThreshold;
417      /**
418       * Check interval of an active namenode's edit log roller thread 
419       */
420      private final int editLogRollerInterval;
421    
422      private volatile boolean hasResourcesAvailable = false;
423      private volatile boolean fsRunning = true;
424      
425      /** The start time of the namesystem. */
426      private final long startTime = now();
427    
428      /** The interval of namenode checking for the disk space availability */
429      private final long resourceRecheckInterval;
430    
431      // The actual resource checker instance.
432      NameNodeResourceChecker nnResourceChecker;
433    
434      private final FsServerDefaults serverDefaults;
435      private final boolean supportAppends;
436      private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
437    
438      private volatile SafeModeInfo safeMode;  // safe mode information
439    
440      private final long maxFsObjects;          // maximum number of fs objects
441    
442      private final long minBlockSize;         // minimum block size
443      private final long maxBlocksPerFile;     // maximum # of blocks per file
444    
445      /**
446       * The global generation stamp for legacy blocks with randomly
447       * generated block IDs.
448       */
449      private final GenerationStamp generationStampV1 = new GenerationStamp();
450    
451      /**
452       * The global generation stamp for this file system.
453       */
454      private final GenerationStamp generationStampV2 = new GenerationStamp();
455    
456      /**
457       * The value of the generation stamp when the first switch to sequential
458       * block IDs was made. Blocks with generation stamps below this value
459       * have randomly allocated block IDs. Blocks with generation stamps above
460       * this value had sequentially allocated block IDs. Read from the fsImage
461       * (or initialized as an offset from the V1 (legacy) generation stamp on
462       * upgrade).
463       */
464      private long generationStampV1Limit =
465          GenerationStamp.GRANDFATHER_GENERATION_STAMP;
466    
467      /**
468       * The global block ID space for this file system.
469       */
470      @VisibleForTesting
471      private final SequentialBlockIdGenerator blockIdGenerator;
472    
473      // precision of access times.
474      private final long accessTimePrecision;
475    
476      /** Lock to protect FSNamesystem. */
477      private final FSNamesystemLock fsLock;
478    
479      /**
480       * Used when this NN is in standby state to read from the shared edit log.
481       */
482      private EditLogTailer editLogTailer = null;
483    
484      /**
485       * Used when this NN is in standby state to perform checkpoints.
486       */
487      private StandbyCheckpointer standbyCheckpointer;
488    
489      /**
490       * Reference to the NN's HAContext object. This is only set once
491       * {@link #startCommonServices(Configuration, HAContext)} is called. 
492       */
493      private HAContext haContext;
494    
495      private final boolean haEnabled;
496    
497      /** flag indicating whether replication queues have been initialized */
498      boolean initializedReplQueues = false;
499    
500      /**
501       * Whether the namenode is in the middle of starting the active service
502       */
503      private volatile boolean startingActiveService = false;
504        
505      private INodeId inodeId;
506      
507      private final RetryCache retryCache;
508    
509      private final AclConfigFlag aclConfigFlag;
510    
511      /**
512       * Set the last allocated inode id when fsimage or editlog is loaded. 
513       */
514      public void resetLastInodeId(long newValue) throws IOException {
515        try {
516          inodeId.skipTo(newValue);
517        } catch(IllegalStateException ise) {
518          throw new IOException(ise);
519        }
520      }
521    
522      /** Should only be used for tests to reset to any value */
523      void resetLastInodeIdWithoutChecking(long newValue) {
524        inodeId.setCurrentValue(newValue);
525      }
526      
527      /** @return the last inode ID. */
528      public long getLastInodeId() {
529        return inodeId.getCurrentValue();
530      }
531    
532      /** Allocate a new inode ID. */
533      public long allocateNewInodeId() {
534        return inodeId.nextValue();
535      }
536      
537      /**
538       * Clear all loaded data
539       */
540      void clear() {
541        dir.reset();
542        dtSecretManager.reset();
543        generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
544        generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
545        blockIdGenerator.setCurrentValue(
546            SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
547        generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
548        leaseManager.removeAllLeases();
549        inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
550        snapshotManager.clearSnapshottableDirs();
551        cacheManager.clear();
552      }
553    
554      @VisibleForTesting
555      LeaseManager getLeaseManager() {
556        return leaseManager;
557      }
558      
559      boolean isHaEnabled() {
560        return haEnabled;
561      }
562      
563      /**
564       * Check the supplied configuration for correctness.
565       * @param conf Supplies the configuration to validate.
566       * @throws IOException if the configuration could not be queried.
567       * @throws IllegalArgumentException if the configuration is invalid.
568       */
569      private static void checkConfiguration(Configuration conf)
570          throws IOException {
571    
572        final Collection<URI> namespaceDirs =
573            FSNamesystem.getNamespaceDirs(conf);
574        final Collection<URI> editsDirs =
575            FSNamesystem.getNamespaceEditsDirs(conf);
576        final Collection<URI> requiredEditsDirs =
577            FSNamesystem.getRequiredNamespaceEditsDirs(conf);
578        final Collection<URI> sharedEditsDirs =
579            FSNamesystem.getSharedEditsDirs(conf);
580    
581        for (URI u : requiredEditsDirs) {
582          if (u.toString().compareTo(
583                  DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
584            continue;
585          }
586    
587          // Each required directory must also be in editsDirs or in
588          // sharedEditsDirs.
589          if (!editsDirs.contains(u) &&
590              !sharedEditsDirs.contains(u)) {
591            throw new IllegalArgumentException(
592                "Required edits directory " + u.toString() + " not present in " +
593                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
594                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
595                editsDirs.toString() + "; " +
596                DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
597                requiredEditsDirs.toString() + ". " +
598                DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
599                sharedEditsDirs.toString() + ".");
600          }
601        }
602    
603        if (namespaceDirs.size() == 1) {
604          LOG.warn("Only one image storage directory ("
605              + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss"
606              + " due to lack of redundant storage directories!");
607        }
608        if (editsDirs.size() == 1) {
609          LOG.warn("Only one namespace edits storage directory ("
610              + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss"
611              + " due to lack of redundant storage directories!");
612        }
613      }
614    
615      /**
616       * Instantiates an FSNamesystem loaded from the image and edits
617       * directories specified in the passed Configuration.
618       *
619       * @param conf the Configuration which specifies the storage directories
620       *             from which to load
621       * @return an FSNamesystem which contains the loaded namespace
622       * @throws IOException if loading fails
623       */
624      static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
625    
626        checkConfiguration(conf);
627        FSImage fsImage = new FSImage(conf,
628            FSNamesystem.getNamespaceDirs(conf),
629            FSNamesystem.getNamespaceEditsDirs(conf));
630        FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
631        StartupOption startOpt = NameNode.getStartupOption(conf);
632        if (startOpt == StartupOption.RECOVER) {
633          namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
634        }
635    
636        long loadStart = now();
637        try {
638          namesystem.loadFSImage(startOpt);
639        } catch (IOException ioe) {
640          LOG.warn("Encountered exception loading fsimage", ioe);
641          fsImage.close();
642          throw ioe;
643        }
644        long timeTakenToLoadFSImage = now() - loadStart;
645        LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
646        NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
647        if (nnMetrics != null) {
648          nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
649        }
650        return namesystem;
651      }
652      
653      FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
654        this(conf, fsImage, false);
655      }
656      
657      /**
658       * Create an FSNamesystem associated with the specified image.
659       * 
660       * Note that this does not load any data off of disk -- if you would
661       * like that behavior, use {@link #loadFromDisk(Configuration)}
662       *
663       * @param conf configuration
664       * @param fsImage The FSImage to associate with
665       * @param ignoreRetryCache Whether or not should ignore the retry cache setup
666       *                         step. For Secondary NN this should be set to true.
667       * @throws IOException on bad configuration
668       */
669      FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
670          throws IOException {
671        if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
672                            DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
673          LOG.info("Enabling async auditlog");
674          enableAsyncAuditLog();
675        }
676        boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
677        LOG.info("fsLock is fair:" + fair);
678        fsLock = new FSNamesystemLock(fair);
679        try {
680          resourceRecheckInterval = conf.getLong(
681              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
682              DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
683    
684          this.blockManager = new BlockManager(this, this, conf);
685          this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
686          this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
687    
688          this.fsOwner = UserGroupInformation.getCurrentUser();
689          this.fsOwnerShortUserName = fsOwner.getShortUserName();
690          this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 
691                                     DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
692          this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
693                                                     DFS_PERMISSIONS_ENABLED_DEFAULT);
694          LOG.info("fsOwner             = " + fsOwner);
695          LOG.info("supergroup          = " + supergroup);
696          LOG.info("isPermissionEnabled = " + isPermissionEnabled);
697    
698          // block allocation has to be persisted in HA using a shared edits directory
699          // so that the standby has up-to-date namespace information
700          String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
701          this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);  
702          
703          // Sanity check the HA-related config.
704          if (nameserviceId != null) {
705            LOG.info("Determined nameservice ID: " + nameserviceId);
706          }
707          LOG.info("HA Enabled: " + haEnabled);
708          if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
709            LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
710            throw new IOException("Invalid configuration: a shared edits dir " +
711                "must not be specified if HA is not enabled.");
712          }
713    
714          // Get the checksum type from config
715          String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
716          DataChecksum.Type checksumType;
717          try {
718             checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
719          } catch (IllegalArgumentException iae) {
720             throw new IOException("Invalid checksum type in "
721                + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
722          }
723    
724          this.serverDefaults = new FsServerDefaults(
725              conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
726              conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
727              conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
728              (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
729              conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
730              conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
731              conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
732              checksumType);
733          
734          this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 
735                                           DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
736    
737          this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
738              DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
739          this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
740              DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
741          this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
742              DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
743          this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
744          LOG.info("Append Enabled: " + supportAppends);
745    
746          this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
747          
748          this.standbyShouldCheckpoint = conf.getBoolean(
749              DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
750          // # edit autoroll threshold is a multiple of the checkpoint threshold 
751          this.editLogRollerThreshold = (long)
752              (conf.getFloat(
753                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
754                  DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
755              conf.getLong(
756                  DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
757                  DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
758          this.editLogRollerInterval = conf.getInt(
759              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
760              DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
761          this.inodeId = new INodeId();
762          
763          // For testing purposes, allow the DT secret manager to be started regardless
764          // of whether security is enabled.
765          alwaysUseDelegationTokensForTests = conf.getBoolean(
766              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
767              DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
768    
769          this.dtSecretManager = createDelegationTokenSecretManager(conf);
770          this.dir = new FSDirectory(fsImage, this, conf);
771          this.snapshotManager = new SnapshotManager(dir);
772          this.cacheManager = new CacheManager(this, conf, blockManager);
773          this.safeMode = new SafeModeInfo(conf);
774          this.auditLoggers = initAuditLoggers(conf);
775          this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
776            auditLoggers.get(0) instanceof DefaultAuditLogger;
777          this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
778          this.aclConfigFlag = new AclConfigFlag(conf);
779        } catch(IOException e) {
780          LOG.error(getClass().getSimpleName() + " initialization failed.", e);
781          close();
782          throw e;
783        } catch (RuntimeException re) {
784          LOG.error(getClass().getSimpleName() + " initialization failed.", re);
785          close();
786          throw re;
787        }
788      }
789      
790      @VisibleForTesting
791      public RetryCache getRetryCache() {
792        return retryCache;
793      }
794      
795      /** Whether or not retry cache is enabled */
796      boolean hasRetryCache() {
797        return retryCache != null;
798      }
799      
800      void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
801        if (retryCache != null) {
802          retryCache.addCacheEntryWithPayload(clientId, callId, payload);
803        }
804      }
805      
806      void addCacheEntry(byte[] clientId, int callId) {
807        if (retryCache != null) {
808          retryCache.addCacheEntry(clientId, callId);
809        }
810      }
811    
812      @VisibleForTesting
813      static RetryCache initRetryCache(Configuration conf) {
814        boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
815            DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
816        LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
817        if (enable) {
818          float heapPercent = conf.getFloat(
819              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
820              DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
821          long entryExpiryMillis = conf.getLong(
822              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
823              DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
824          LOG.info("Retry cache will use " + heapPercent
825              + " of total heap and retry cache entry expiry time is "
826              + entryExpiryMillis + " millis");
827          long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
828          return new RetryCache("NameNodeRetryCache", heapPercent,
829              entryExpiryNanos);
830        }
831        return null;
832      }
833    
834      private List<AuditLogger> initAuditLoggers(Configuration conf) {
835        // Initialize the custom access loggers if configured.
836        Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
837        List<AuditLogger> auditLoggers = Lists.newArrayList();
838        if (alClasses != null && !alClasses.isEmpty()) {
839          for (String className : alClasses) {
840            try {
841              AuditLogger logger;
842              if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
843                logger = new DefaultAuditLogger();
844              } else {
845                logger = (AuditLogger) Class.forName(className).newInstance();
846              }
847              logger.initialize(conf);
848              auditLoggers.add(logger);
849            } catch (RuntimeException re) {
850              throw re;
851            } catch (Exception e) {
852              throw new RuntimeException(e);
853            }
854          }
855        }
856    
857        // Make sure there is at least one logger installed.
858        if (auditLoggers.isEmpty()) {
859          auditLoggers.add(new DefaultAuditLogger());
860        }
861        return Collections.unmodifiableList(auditLoggers);
862      }
863    
864      private void loadFSImage(StartupOption startOpt) throws IOException {
865        final FSImage fsImage = getFSImage();
866    
867        // format before starting up if requested
868        if (startOpt == StartupOption.FORMAT) {
869          
870          fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
871    
872          startOpt = StartupOption.REGULAR;
873        }
874        boolean success = false;
875        writeLock();
876        try {
877          // We shouldn't be calling saveNamespace if we've come up in standby state.
878          MetaRecoveryContext recovery = startOpt.createRecoveryContext();
879          final boolean staleImage
880              = fsImage.recoverTransitionRead(startOpt, this, recovery);
881          if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
882            rollingUpgradeInfo = null;
883          }
884          final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 
885          LOG.info("Need to save fs image? " + needToSave
886              + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
887              + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
888          if (needToSave) {
889            fsImage.saveNamespace(this);
890          } else {
891            // No need to save, so mark the phase done.
892            StartupProgress prog = NameNode.getStartupProgress();
893            prog.beginPhase(Phase.SAVING_CHECKPOINT);
894            prog.endPhase(Phase.SAVING_CHECKPOINT);
895          }
896          // This will start a new log segment and write to the seen_txid file, so
897          // we shouldn't do it when coming up in standby state
898          if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) {
899            fsImage.openEditLogForWrite();
900          }
901          success = true;
902        } finally {
903          if (!success) {
904            fsImage.close();
905          }
906          writeUnlock();
907        }
908        dir.imageLoadComplete();
909      }
910    
911      private void startSecretManager() {
912        if (dtSecretManager != null) {
913          try {
914            dtSecretManager.startThreads();
915          } catch (IOException e) {
916            // Inability to start secret manager
917            // can't be recovered from.
918            throw new RuntimeException(e);
919          }
920        }
921      }
922      
923      private void startSecretManagerIfNecessary() {
924        boolean shouldRun = shouldUseDelegationTokens() &&
925          !isInSafeMode() && getEditLog().isOpenForWrite();
926        boolean running = dtSecretManager.isRunning();
927        if (shouldRun && !running) {
928          startSecretManager();
929        }
930      }
931    
932      private void stopSecretManager() {
933        if (dtSecretManager != null) {
934          dtSecretManager.stopThreads();
935        }
936      }
937      
938      /** 
939       * Start services common to both active and standby states
940       * @param haContext 
941       * @throws IOException
942       */
943      void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
944        this.registerMBean(); // register the MBean for the FSNamesystemState
945        writeLock();
946        this.haContext = haContext;
947        try {
948          nnResourceChecker = new NameNodeResourceChecker(conf);
949          checkAvailableResources();
950          assert safeMode != null && !isPopulatingReplQueues();
951          StartupProgress prog = NameNode.getStartupProgress();
952          prog.beginPhase(Phase.SAFEMODE);
953          prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
954            getCompleteBlocksTotal());
955          setBlockTotal();
956          blockManager.activate(conf);
957        } finally {
958          writeUnlock();
959        }
960        
961        registerMXBean();
962        DefaultMetricsSystem.instance().register(this);
963      }
964      
965      /** 
966       * Stop services common to both active and standby states
967       * @throws IOException
968       */
969      void stopCommonServices() {
970        writeLock();
971        try {
972          if (blockManager != null) blockManager.close();
973        } finally {
974          writeUnlock();
975        }
976        RetryCache.clear(retryCache);
977      }
978      
979      /**
980       * Start services required in active state
981       * @throws IOException
982       */
983      void startActiveServices() throws IOException {
984        startingActiveService = true;
985        LOG.info("Starting services required for active state");
986        writeLock();
987        try {
988          FSEditLog editLog = dir.fsImage.getEditLog();
989          
990          if (!editLog.isOpenForWrite()) {
991            // During startup, we're already open for write during initialization.
992            editLog.initJournalsForWrite();
993            // May need to recover
994            editLog.recoverUnclosedStreams();
995            
996            LOG.info("Catching up to latest edits from old active before " +
997                "taking over writer role in edits logs");
998            editLogTailer.catchupDuringFailover();
999            
1000            blockManager.setPostponeBlocksFromFuture(false);
1001            blockManager.getDatanodeManager().markAllDatanodesStale();
1002            blockManager.clearQueues();
1003            blockManager.processAllPendingDNMessages();
1004    
1005            // Only need to re-process the queue, If not in SafeMode.
1006            if (!isInSafeMode()) {
1007              LOG.info("Reprocessing replication and invalidation queues");
1008              initializeReplQueues();
1009            }
1010    
1011            if (LOG.isDebugEnabled()) {
1012              LOG.debug("NameNode metadata after re-processing " +
1013                  "replication and invalidation queues during failover:\n" +
1014                  metaSaveAsString());
1015            }
1016            
1017            long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
1018            LOG.info("Will take over writing edit logs at txnid " + 
1019                nextTxId);
1020            editLog.setNextTxId(nextTxId);
1021    
1022            dir.fsImage.editLog.openForWrite();
1023          }
1024          
1025          if (haEnabled) {
1026            // Renew all of the leases before becoming active.
1027            // This is because, while we were in standby mode,
1028            // the leases weren't getting renewed on this NN.
1029            // Give them all a fresh start here.
1030            leaseManager.renewAllLeases();
1031          }
1032          leaseManager.startMonitor();
1033          startSecretManagerIfNecessary();
1034    
1035          //ResourceMonitor required only at ActiveNN. See HDFS-2914
1036          this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1037          nnrmthread.start();
1038    
1039          nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1040              editLogRollerThreshold, editLogRollerInterval));
1041          nnEditLogRoller.start();
1042    
1043          cacheManager.startMonitorThread();
1044          blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1045        } finally {
1046          writeUnlock();
1047          startingActiveService = false;
1048        }
1049      }
1050    
1051      /**
1052       * Initialize replication queues.
1053       */
1054      private void initializeReplQueues() {
1055        LOG.info("initializing replication queues");
1056        blockManager.processMisReplicatedBlocks();
1057        initializedReplQueues = true;
1058      }
1059    
1060      private boolean inActiveState() {
1061        return haContext != null &&
1062            haContext.getState().getServiceState() == HAServiceState.ACTIVE;
1063      }
1064    
1065      /**
1066       * @return Whether the namenode is transitioning to active state and is in the
1067       *         middle of the {@link #startActiveServices()}
1068       */
1069      public boolean inTransitionToActive() {
1070        return haEnabled && inActiveState() && startingActiveService;
1071      }
1072    
1073      private boolean shouldUseDelegationTokens() {
1074        return UserGroupInformation.isSecurityEnabled() ||
1075          alwaysUseDelegationTokensForTests;
1076      }
1077    
1078      /** 
1079       * Stop services required in active state
1080       * @throws InterruptedException
1081       */
1082      void stopActiveServices() {
1083        LOG.info("Stopping services started for active state");
1084        writeLock();
1085        try {
1086          stopSecretManager();
1087          if (leaseManager != null) {
1088            leaseManager.stopMonitor();
1089          }
1090          if (nnrmthread != null) {
1091            ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1092            nnrmthread.interrupt();
1093          }
1094          if (nnEditLogRoller != null) {
1095            ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1096            nnEditLogRoller.interrupt();
1097          }
1098          if (dir != null && dir.fsImage != null) {
1099            if (dir.fsImage.editLog != null) {
1100              dir.fsImage.editLog.close();
1101            }
1102            // Update the fsimage with the last txid that we wrote
1103            // so that the tailer starts from the right spot.
1104            dir.fsImage.updateLastAppliedTxIdFromWritten();
1105          }
1106          cacheManager.stopMonitorThread();
1107          cacheManager.clearDirectiveStats();
1108          blockManager.getDatanodeManager().clearPendingCachingCommands();
1109          blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1110          // Don't want to keep replication queues when not in Active.
1111          blockManager.clearQueues();
1112          initializedReplQueues = false;
1113        } finally {
1114          writeUnlock();
1115        }
1116      }
1117      
1118      /**
1119       * Start services required in standby state 
1120       * 
1121       * @throws IOException
1122       */
1123      void startStandbyServices(final Configuration conf) throws IOException {
1124        LOG.info("Starting services required for standby state");
1125        if (!dir.fsImage.editLog.isOpenForRead()) {
1126          // During startup, we're already open for read.
1127          dir.fsImage.editLog.initSharedJournalsForRead();
1128        }
1129        
1130        blockManager.setPostponeBlocksFromFuture(true);
1131    
1132        editLogTailer = new EditLogTailer(this, conf);
1133        editLogTailer.start();
1134        if (standbyShouldCheckpoint) {
1135          standbyCheckpointer = new StandbyCheckpointer(conf, this);
1136          standbyCheckpointer.start();
1137        }
1138      }
1139    
1140      /**
1141       * Called when the NN is in Standby state and the editlog tailer tails the
1142       * OP_ROLLING_UPGRADE_START.
1143       */
1144      void triggerRollbackCheckpoint() {
1145        setNeedRollbackFsImage(true);
1146        if (standbyCheckpointer != null) {
1147          standbyCheckpointer.triggerRollbackCheckpoint();
1148        }
1149      }
1150    
1151      /**
1152       * Called while the NN is in Standby state, but just about to be
1153       * asked to enter Active state. This cancels any checkpoints
1154       * currently being taken.
1155       */
1156      void prepareToStopStandbyServices() throws ServiceFailedException {
1157        if (standbyCheckpointer != null) {
1158          standbyCheckpointer.cancelAndPreventCheckpoints(
1159              "About to leave standby state");
1160        }
1161      }
1162    
1163      /** Stop services required in standby state */
1164      void stopStandbyServices() throws IOException {
1165        LOG.info("Stopping services started for standby state");
1166        if (standbyCheckpointer != null) {
1167          standbyCheckpointer.stop();
1168        }
1169        if (editLogTailer != null) {
1170          editLogTailer.stop();
1171        }
1172        if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) {
1173          dir.fsImage.editLog.close();
1174        }
1175      }
1176      
1177      @Override
1178      public void checkOperation(OperationCategory op) throws StandbyException {
1179        if (haContext != null) {
1180          // null in some unit tests
1181          haContext.checkOperation(op);
1182        }
1183      }
1184      
1185      /**
1186       * @throws RetriableException
1187       *           If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1188       *           NameNode is in active state
1189       * @throws SafeModeException
1190       *           Otherwise if NameNode is in SafeMode.
1191       */
1192      private void checkNameNodeSafeMode(String errorMsg)
1193          throws RetriableException, SafeModeException {
1194        if (isInSafeMode()) {
1195          SafeModeException se = new SafeModeException(errorMsg, safeMode);
1196          if (haEnabled && haContext != null
1197              && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1198              && shouldRetrySafeMode(this.safeMode)) {
1199            throw new RetriableException(se);
1200          } else {
1201            throw se;
1202          }
1203        }
1204      }
1205      
1206      /**
1207       * We already know that the safemode is on. We will throw a RetriableException
1208       * if the safemode is not manual or caused by low resource.
1209       */
1210      private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1211        if (safeMode == null) {
1212          return false;
1213        } else {
1214          return !safeMode.isManual() && !safeMode.areResourcesLow();
1215        }
1216      }
1217      
1218      public static Collection<URI> getNamespaceDirs(Configuration conf) {
1219        return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1220      }
1221    
1222      /**
1223       * Get all edits dirs which are required. If any shared edits dirs are
1224       * configured, these are also included in the set of required dirs.
1225       * 
1226       * @param conf the HDFS configuration.
1227       * @return all required dirs.
1228       */
1229      public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1230        Set<URI> ret = new HashSet<URI>();
1231        ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1232        ret.addAll(getSharedEditsDirs(conf));
1233        return ret;
1234      }
1235    
1236      private static Collection<URI> getStorageDirs(Configuration conf,
1237                                                    String propertyName) {
1238        Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1239        StartupOption startOpt = NameNode.getStartupOption(conf);
1240        if(startOpt == StartupOption.IMPORT) {
1241          // In case of IMPORT this will get rid of default directories 
1242          // but will retain directories specified in hdfs-site.xml
1243          // When importing image from a checkpoint, the name-node can
1244          // start with empty set of storage directories.
1245          Configuration cE = new HdfsConfiguration(false);
1246          cE.addResource("core-default.xml");
1247          cE.addResource("core-site.xml");
1248          cE.addResource("hdfs-default.xml");
1249          Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1250          dirNames.removeAll(dirNames2);
1251          if(dirNames.isEmpty())
1252            LOG.warn("!!! WARNING !!!" +
1253              "\n\tThe NameNode currently runs without persistent storage." +
1254              "\n\tAny changes to the file system meta-data may be lost." +
1255              "\n\tRecommended actions:" +
1256              "\n\t\t- shutdown and restart NameNode with configured \"" 
1257              + propertyName + "\" in hdfs-site.xml;" +
1258              "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1259              "of the file system meta-data.");
1260        } else if (dirNames.isEmpty()) {
1261          dirNames = Collections.singletonList(
1262              DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1263        }
1264        return Util.stringCollectionAsURIs(dirNames);
1265      }
1266    
1267      /**
1268       * Return an ordered list of edits directories to write to.
1269       * The list is ordered such that all shared edits directories
1270       * are ordered before non-shared directories, and any duplicates
1271       * are removed. The order they are specified in the configuration
1272       * is retained.
1273       * @return Collection of shared edits directories.
1274       * @throws IOException if multiple shared edits directories are configured
1275       */
1276      public static List<URI> getNamespaceEditsDirs(Configuration conf)
1277          throws IOException {
1278        return getNamespaceEditsDirs(conf, true);
1279      }
1280      
1281      public static List<URI> getNamespaceEditsDirs(Configuration conf,
1282          boolean includeShared)
1283          throws IOException {
1284        // Use a LinkedHashSet so that order is maintained while we de-dup
1285        // the entries.
1286        LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1287        
1288        if (includeShared) {
1289          List<URI> sharedDirs = getSharedEditsDirs(conf);
1290      
1291          // Fail until multiple shared edits directories are supported (HDFS-2782)
1292          if (sharedDirs.size() > 1) {
1293            throw new IOException(
1294                "Multiple shared edits directories are not yet supported");
1295          }
1296      
1297          // First add the shared edits dirs. It's critical that the shared dirs
1298          // are added first, since JournalSet syncs them in the order they are listed,
1299          // and we need to make sure all edits are in place in the shared storage
1300          // before they are replicated locally. See HDFS-2874.
1301          for (URI dir : sharedDirs) {
1302            if (!editsDirs.add(dir)) {
1303              LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1304                  DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1305            }
1306          }
1307        }    
1308        // Now add the non-shared dirs.
1309        for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1310          if (!editsDirs.add(dir)) {
1311            LOG.warn("Edits URI " + dir + " listed multiple times in " + 
1312                DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1313                DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1314          }
1315        }
1316    
1317        if (editsDirs.isEmpty()) {
1318          // If this is the case, no edit dirs have been explicitly configured.
1319          // Image dirs are to be used for edits too.
1320          return Lists.newArrayList(getNamespaceDirs(conf));
1321        } else {
1322          return Lists.newArrayList(editsDirs);
1323        }
1324      }
1325      
1326      /**
1327       * Returns edit directories that are shared between primary and secondary.
1328       * @param conf
1329       * @return Collection of edit directories.
1330       */
1331      public static List<URI> getSharedEditsDirs(Configuration conf) {
1332        // don't use getStorageDirs here, because we want an empty default
1333        // rather than the dir in /tmp
1334        Collection<String> dirNames = conf.getTrimmedStringCollection(
1335            DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1336        return Util.stringCollectionAsURIs(dirNames);
1337      }
1338    
1339      @Override
1340      public void readLock() {
1341        this.fsLock.readLock().lock();
1342      }
1343      @Override
1344      public void longReadLockInterruptibly() throws InterruptedException {
1345        this.fsLock.longReadLock().lockInterruptibly();
1346        try {
1347          this.fsLock.readLock().lockInterruptibly();
1348        } catch (InterruptedException ie) {
1349          // In the event we're interrupted while getting the normal FSNS read lock,
1350          // release the long read lock.
1351          this.fsLock.longReadLock().unlock();
1352          throw ie;
1353        }
1354      }
1355      @Override
1356      public void longReadUnlock() {
1357        this.fsLock.readLock().unlock();
1358        this.fsLock.longReadLock().unlock();
1359      }
1360      @Override
1361      public void readUnlock() {
1362        this.fsLock.readLock().unlock();
1363      }
1364      @Override
1365      public void writeLock() {
1366        this.fsLock.longReadLock().lock();
1367        this.fsLock.writeLock().lock();
1368      }
1369      @Override
1370      public void writeLockInterruptibly() throws InterruptedException {
1371        this.fsLock.longReadLock().lockInterruptibly();
1372        try {
1373          this.fsLock.writeLock().lockInterruptibly();
1374        } catch (InterruptedException ie) {
1375          // In the event we're interrupted while getting the normal FSNS write
1376          // lock, release the long read lock.
1377          this.fsLock.longReadLock().unlock();
1378          throw ie;
1379        }
1380      }
1381      @Override
1382      public void writeUnlock() {
1383        this.fsLock.writeLock().unlock();
1384        this.fsLock.longReadLock().unlock();
1385      }
1386      @Override
1387      public boolean hasWriteLock() {
1388        return this.fsLock.isWriteLockedByCurrentThread();
1389      }
1390      @Override
1391      public boolean hasReadLock() {
1392        return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1393      }
1394    
1395      public int getReadHoldCount() {
1396        return this.fsLock.getReadHoldCount();
1397      }
1398    
1399      public int getWriteHoldCount() {
1400        return this.fsLock.getWriteHoldCount();
1401      }
1402    
1403      NamespaceInfo getNamespaceInfo() {
1404        readLock();
1405        try {
1406          return unprotectedGetNamespaceInfo();
1407        } finally {
1408          readUnlock();
1409        }
1410      }
1411    
1412      /**
1413       * Version of @see #getNamespaceInfo() that is not protected by a lock.
1414       */
1415      NamespaceInfo unprotectedGetNamespaceInfo() {
1416        return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
1417            getClusterId(), getBlockPoolId(),
1418            dir.fsImage.getStorage().getCTime());
1419      }
1420    
1421      /**
1422       * Close down this file system manager.
1423       * Causes heartbeat and lease daemons to stop; waits briefly for
1424       * them to finish, but a short timeout returns control back to caller.
1425       */
1426      void close() {
1427        fsRunning = false;
1428        try {
1429          stopCommonServices();
1430          if (smmthread != null) smmthread.interrupt();
1431        } finally {
1432          // using finally to ensure we also wait for lease daemon
1433          try {
1434            stopActiveServices();
1435            stopStandbyServices();
1436            if (dir != null) {
1437              dir.close();
1438            }
1439          } catch (IOException ie) {
1440            LOG.error("Error closing FSDirectory", ie);
1441            IOUtils.cleanup(LOG, dir);
1442          }
1443        }
1444      }
1445    
1446      @Override
1447      public boolean isRunning() {
1448        return fsRunning;
1449      }
1450      
1451      @Override
1452      public boolean isInStandbyState() {
1453        if (haContext == null || haContext.getState() == null) {
1454          // We're still starting up. In this case, if HA is
1455          // on for the cluster, we always start in standby. Otherwise
1456          // start in active.
1457          return haEnabled;
1458        }
1459    
1460        return HAServiceState.STANDBY == haContext.getState().getServiceState();
1461      }
1462    
1463      /**
1464       * Dump all metadata into specified file
1465       */
1466      void metaSave(String filename) throws IOException {
1467        checkSuperuserPrivilege();
1468        checkOperation(OperationCategory.UNCHECKED);
1469        writeLock();
1470        try {
1471          checkOperation(OperationCategory.UNCHECKED);
1472          File file = new File(System.getProperty("hadoop.log.dir"), filename);
1473          PrintWriter out = new PrintWriter(new BufferedWriter(
1474              new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1475          metaSave(out);
1476          out.flush();
1477          out.close();
1478        } finally {
1479          writeUnlock();
1480        }
1481      }
1482    
1483      private void metaSave(PrintWriter out) {
1484        assert hasWriteLock();
1485        long totalInodes = this.dir.totalInodes();
1486        long totalBlocks = this.getBlocksTotal();
1487        out.println(totalInodes + " files and directories, " + totalBlocks
1488            + " blocks = " + (totalInodes + totalBlocks) + " total");
1489    
1490        blockManager.metaSave(out);
1491      }
1492    
1493      private String metaSaveAsString() {
1494        StringWriter sw = new StringWriter();
1495        PrintWriter pw = new PrintWriter(sw);
1496        metaSave(pw);
1497        pw.flush();
1498        return sw.toString();
1499      }
1500      
1501    
1502      long getDefaultBlockSize() {
1503        return serverDefaults.getBlockSize();
1504      }
1505    
1506      FsServerDefaults getServerDefaults() throws StandbyException {
1507        checkOperation(OperationCategory.READ);
1508        return serverDefaults;
1509      }
1510    
1511      long getAccessTimePrecision() {
1512        return accessTimePrecision;
1513      }
1514    
1515      private boolean isAccessTimeSupported() {
1516        return accessTimePrecision > 0;
1517      }
1518    
1519      /////////////////////////////////////////////////////////
1520      //
1521      // These methods are called by HadoopFS clients
1522      //
1523      /////////////////////////////////////////////////////////
1524      /**
1525       * Set permissions for an existing file.
1526       * @throws IOException
1527       */
1528      void setPermission(String src, FsPermission permission)
1529          throws AccessControlException, FileNotFoundException, SafeModeException,
1530          UnresolvedLinkException, IOException {
1531        try {
1532          setPermissionInt(src, permission);
1533        } catch (AccessControlException e) {
1534          logAuditEvent(false, "setPermission", src);
1535          throw e;
1536        }
1537      }
1538    
1539      private void setPermissionInt(String src, FsPermission permission)
1540          throws AccessControlException, FileNotFoundException, SafeModeException,
1541          UnresolvedLinkException, IOException {
1542        HdfsFileStatus resultingStat = null;
1543        FSPermissionChecker pc = getPermissionChecker();
1544        checkOperation(OperationCategory.WRITE);
1545        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1546        writeLock();
1547        try {
1548          checkOperation(OperationCategory.WRITE);
1549          checkNameNodeSafeMode("Cannot set permission for " + src);
1550          src = FSDirectory.resolvePath(src, pathComponents, dir);
1551          checkOwner(pc, src);
1552          dir.setPermission(src, permission);
1553          resultingStat = getAuditFileInfo(src, false);
1554        } finally {
1555          writeUnlock();
1556        }
1557        getEditLog().logSync();
1558        logAuditEvent(true, "setPermission", src, null, resultingStat);
1559      }
1560    
1561      /**
1562       * Set owner for an existing file.
1563       * @throws IOException
1564       */
1565      void setOwner(String src, String username, String group)
1566          throws AccessControlException, FileNotFoundException, SafeModeException,
1567          UnresolvedLinkException, IOException {
1568        try {
1569          setOwnerInt(src, username, group);
1570        } catch (AccessControlException e) {
1571          logAuditEvent(false, "setOwner", src);
1572          throw e;
1573        } 
1574      }
1575    
1576      private void setOwnerInt(String src, String username, String group)
1577          throws AccessControlException, FileNotFoundException, SafeModeException,
1578          UnresolvedLinkException, IOException {
1579        HdfsFileStatus resultingStat = null;
1580        FSPermissionChecker pc = getPermissionChecker();
1581        checkOperation(OperationCategory.WRITE);
1582        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1583        writeLock();
1584        try {
1585          checkOperation(OperationCategory.WRITE);
1586          checkNameNodeSafeMode("Cannot set owner for " + src);
1587          src = FSDirectory.resolvePath(src, pathComponents, dir);
1588          checkOwner(pc, src);
1589          if (!pc.isSuperUser()) {
1590            if (username != null && !pc.getUser().equals(username)) {
1591              throw new AccessControlException("Non-super user cannot change owner");
1592            }
1593            if (group != null && !pc.containsGroup(group)) {
1594              throw new AccessControlException("User does not belong to " + group);
1595            }
1596          }
1597          dir.setOwner(src, username, group);
1598          resultingStat = getAuditFileInfo(src, false);
1599        } finally {
1600          writeUnlock();
1601        }
1602        getEditLog().logSync();
1603        logAuditEvent(true, "setOwner", src, null, resultingStat);
1604      }
1605    
1606      /**
1607       * Get block locations within the specified range.
1608       * @see ClientProtocol#getBlockLocations(String, long, long)
1609       */
1610      LocatedBlocks getBlockLocations(String clientMachine, String src,
1611          long offset, long length) throws AccessControlException,
1612          FileNotFoundException, UnresolvedLinkException, IOException {
1613        LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1614            true);
1615        if (blocks != null) {
1616          blockManager.getDatanodeManager().sortLocatedBlocks(
1617              clientMachine, blocks.getLocatedBlocks());
1618          
1619          LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1620          if (lastBlock != null) {
1621            ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>();
1622            lastBlockList.add(lastBlock);
1623            blockManager.getDatanodeManager().sortLocatedBlocks(
1624                                  clientMachine, lastBlockList);
1625          }
1626        }
1627        return blocks;
1628      }
1629    
1630      /**
1631       * Get block locations within the specified range.
1632       * @see ClientProtocol#getBlockLocations(String, long, long)
1633       * @throws FileNotFoundException, UnresolvedLinkException, IOException
1634       */
1635      LocatedBlocks getBlockLocations(String src, long offset, long length,
1636          boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1637          throws FileNotFoundException, UnresolvedLinkException, IOException {
1638        try {
1639          return getBlockLocationsInt(src, offset, length, doAccessTime,
1640                                      needBlockToken, checkSafeMode);
1641        } catch (AccessControlException e) {
1642          logAuditEvent(false, "open", src);
1643          throw e;
1644        }
1645      }
1646    
1647      private LocatedBlocks getBlockLocationsInt(String src, long offset,
1648          long length, boolean doAccessTime, boolean needBlockToken,
1649          boolean checkSafeMode)
1650          throws FileNotFoundException, UnresolvedLinkException, IOException {
1651        if (offset < 0) {
1652          throw new HadoopIllegalArgumentException(
1653              "Negative offset is not supported. File: " + src);
1654        }
1655        if (length < 0) {
1656          throw new HadoopIllegalArgumentException(
1657              "Negative length is not supported. File: " + src);
1658        }
1659        final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1660            offset, length, doAccessTime, needBlockToken);  
1661        logAuditEvent(true, "open", src);
1662        if (checkSafeMode && isInSafeMode()) {
1663          for (LocatedBlock b : ret.getLocatedBlocks()) {
1664            // if safemode & no block locations yet then throw safemodeException
1665            if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1666              SafeModeException se = new SafeModeException(
1667                  "Zero blocklocations for " + src, safeMode);
1668              if (haEnabled && haContext != null && 
1669                  haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1670                throw new RetriableException(se);
1671              } else {
1672                throw se;
1673              }
1674            }
1675          }
1676        }
1677        return ret;
1678      }
1679    
1680      /*
1681       * Get block locations within the specified range, updating the
1682       * access times if necessary. 
1683       */
1684      private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
1685          long length, boolean doAccessTime, boolean needBlockToken)
1686          throws FileNotFoundException,
1687          UnresolvedLinkException, IOException {
1688        FSPermissionChecker pc = getPermissionChecker();
1689        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1690        for (int attempt = 0; attempt < 2; attempt++) {
1691          boolean isReadOp = (attempt == 0);
1692          if (isReadOp) { // first attempt is with readlock
1693            checkOperation(OperationCategory.READ);
1694            readLock();
1695          }  else { // second attempt is with  write lock
1696            checkOperation(OperationCategory.WRITE);
1697            writeLock(); // writelock is needed to set accesstime
1698          }
1699          src = FSDirectory.resolvePath(src, pathComponents, dir);
1700          try {
1701            if (isReadOp) {
1702              checkOperation(OperationCategory.READ);
1703            } else {
1704              checkOperation(OperationCategory.WRITE);
1705            }
1706            if (isPermissionEnabled) {
1707              checkPathAccess(pc, src, FsAction.READ);
1708            }
1709    
1710            // if the namenode is in safemode, then do not update access time
1711            if (isInSafeMode()) {
1712              doAccessTime = false;
1713            }
1714    
1715            final INodesInPath iip = dir.getLastINodeInPath(src);
1716            final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src);
1717            if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1718                && doAccessTime && isAccessTimeSupported()) {
1719              final long now = now();
1720              if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1721                // if we have to set access time but we only have the readlock, then
1722                // restart this entire operation with the writeLock.
1723                if (isReadOp) {
1724                  continue;
1725                }
1726                dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshotId());
1727              }
1728            }
1729            final long fileSize = iip.isSnapshot() ?
1730                inode.computeFileSize(iip.getPathSnapshotId())
1731                : inode.computeFileSizeNotIncludingLastUcBlock();
1732            boolean isUc = inode.isUnderConstruction();
1733            if (iip.isSnapshot()) {
1734              // if src indicates a snapshot file, we need to make sure the returned
1735              // blocks do not exceed the size of the snapshot file.
1736              length = Math.min(length, fileSize - offset);
1737              isUc = false;
1738            }
1739            LocatedBlocks blocks =
1740              blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1741                isUc, offset, length, needBlockToken, iip.isSnapshot());
1742            // Set caching information for the located blocks.
1743            for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1744              cacheManager.setCachedLocations(lb);
1745            }
1746            return blocks;
1747          } finally {
1748            if (isReadOp) {
1749              readUnlock();
1750            } else {
1751              writeUnlock();
1752            }
1753          }
1754        }
1755        return null; // can never reach here
1756      }
1757    
1758      /**
1759       * Moves all the blocks from srcs and appends them to trg
1760       * To avoid rollbacks we will verify validitity of ALL of the args
1761       * before we start actual move.
1762       * 
1763       * This does not support ".inodes" relative path
1764       * @param target
1765       * @param srcs
1766       * @throws IOException
1767       */
1768      void concat(String target, String [] srcs) 
1769          throws IOException, UnresolvedLinkException {
1770        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1771        if (cacheEntry != null && cacheEntry.isSuccess()) {
1772          return; // Return previous response
1773        }
1774        
1775        // Either there is no previous request in progres or it has failed
1776        if(FSNamesystem.LOG.isDebugEnabled()) {
1777          FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1778              " to " + target);
1779        }
1780        
1781        boolean success = false;
1782        try {
1783          concatInt(target, srcs, cacheEntry != null);
1784          success = true;
1785        } catch (AccessControlException e) {
1786          logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1787          throw e;
1788        } finally {
1789          RetryCache.setState(cacheEntry, success);
1790        }
1791      }
1792    
1793      private void concatInt(String target, String [] srcs, 
1794          boolean logRetryCache) throws IOException, UnresolvedLinkException {
1795        // verify args
1796        if(target.isEmpty()) {
1797          throw new IllegalArgumentException("Target file name is empty");
1798        }
1799        if(srcs == null || srcs.length == 0) {
1800          throw new IllegalArgumentException("No sources given");
1801        }
1802        
1803        // We require all files be in the same directory
1804        String trgParent = 
1805          target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1806        for (String s : srcs) {
1807          String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1808          if (!srcParent.equals(trgParent)) {
1809            throw new IllegalArgumentException(
1810               "Sources and target are not in the same directory");
1811          }
1812        }
1813    
1814        HdfsFileStatus resultingStat = null;
1815        FSPermissionChecker pc = getPermissionChecker();
1816        checkOperation(OperationCategory.WRITE);
1817        writeLock();
1818        try {
1819          checkOperation(OperationCategory.WRITE);
1820          checkNameNodeSafeMode("Cannot concat " + target);
1821          concatInternal(pc, target, srcs, logRetryCache);
1822          resultingStat = getAuditFileInfo(target, false);
1823        } finally {
1824          writeUnlock();
1825        }
1826        getEditLog().logSync();
1827        logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
1828      }
1829    
1830      /** See {@link #concat(String, String[])} */
1831      private void concatInternal(FSPermissionChecker pc, String target,
1832          String[] srcs, boolean logRetryCache) throws IOException,
1833          UnresolvedLinkException {
1834        assert hasWriteLock();
1835    
1836        // write permission for the target
1837        if (isPermissionEnabled) {
1838          checkPathAccess(pc, target, FsAction.WRITE);
1839    
1840          // and srcs
1841          for(String aSrc: srcs) {
1842            checkPathAccess(pc, aSrc, FsAction.READ); // read the file
1843            checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 
1844          }
1845        }
1846    
1847        // to make sure no two files are the same
1848        Set<INode> si = new HashSet<INode>();
1849    
1850        // we put the following prerequisite for the operation
1851        // replication and blocks sizes should be the same for ALL the blocks
1852    
1853        // check the target
1854        final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target),
1855            target);
1856        if(trgInode.isUnderConstruction()) {
1857          throw new HadoopIllegalArgumentException("concat: target file "
1858              + target + " is under construction");
1859        }
1860        // per design target shouldn't be empty and all the blocks same size
1861        if(trgInode.numBlocks() == 0) {
1862          throw new HadoopIllegalArgumentException("concat: target file "
1863              + target + " is empty");
1864        }
1865        if (trgInode.isWithSnapshot()) {
1866          throw new HadoopIllegalArgumentException("concat: target file "
1867              + target + " is in a snapshot");
1868        }
1869    
1870        long blockSize = trgInode.getPreferredBlockSize();
1871    
1872        // check the end block to be full
1873        final BlockInfo last = trgInode.getLastBlock();
1874        if(blockSize != last.getNumBytes()) {
1875          throw new HadoopIllegalArgumentException("The last block in " + target
1876              + " is not full; last block size = " + last.getNumBytes()
1877              + " but file block size = " + blockSize);
1878        }
1879    
1880        si.add(trgInode);
1881        final short repl = trgInode.getFileReplication();
1882    
1883        // now check the srcs
1884        boolean endSrc = false; // final src file doesn't have to have full end block
1885        for(int i=0; i<srcs.length; i++) {
1886          String src = srcs[i];
1887          if(i==srcs.length-1)
1888            endSrc=true;
1889    
1890          final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
1891          if(src.isEmpty() 
1892              || srcInode.isUnderConstruction()
1893              || srcInode.numBlocks() == 0) {
1894            throw new HadoopIllegalArgumentException("concat: source file " + src
1895                + " is invalid or empty or underConstruction");
1896          }
1897    
1898          // check replication and blocks size
1899          if(repl != srcInode.getBlockReplication()) {
1900            throw new HadoopIllegalArgumentException("concat: the soruce file "
1901                + src + " and the target file " + target
1902                + " should have the same replication: source replication is "
1903                + srcInode.getBlockReplication()
1904                + " but target replication is " + repl);
1905          }
1906    
1907          //boolean endBlock=false;
1908          // verify that all the blocks are of the same length as target
1909          // should be enough to check the end blocks
1910          final BlockInfo[] srcBlocks = srcInode.getBlocks();
1911          int idx = srcBlocks.length-1;
1912          if(endSrc)
1913            idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
1914          if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
1915            throw new HadoopIllegalArgumentException("concat: the soruce file "
1916                + src + " and the target file " + target
1917                + " should have the same blocks sizes: target block size is "
1918                + blockSize + " but the size of source block " + idx + " is "
1919                + srcBlocks[idx].getNumBytes());
1920          }
1921    
1922          si.add(srcInode);
1923        }
1924    
1925        // make sure no two files are the same
1926        if(si.size() < srcs.length+1) { // trg + srcs
1927          // it means at least two files are the same
1928          throw new HadoopIllegalArgumentException(
1929              "concat: at least two of the source files are the same");
1930        }
1931    
1932        if(NameNode.stateChangeLog.isDebugEnabled()) {
1933          NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 
1934              Arrays.toString(srcs) + " to " + target);
1935        }
1936    
1937        dir.concat(target,srcs, logRetryCache);
1938      }
1939      
1940      /**
1941       * stores the modification and access time for this inode. 
1942       * The access time is precise upto an hour. The transaction, if needed, is
1943       * written to the edits log but is not flushed.
1944       */
1945      void setTimes(String src, long mtime, long atime) 
1946          throws IOException, UnresolvedLinkException {
1947        if (!isAccessTimeSupported() && atime != -1) {
1948          throw new IOException("Access time for hdfs is not configured. " +
1949                                " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
1950        }
1951        try {
1952          setTimesInt(src, mtime, atime);
1953        } catch (AccessControlException e) {
1954          logAuditEvent(false, "setTimes", src);
1955          throw e;
1956        }
1957      }
1958    
1959      private void setTimesInt(String src, long mtime, long atime) 
1960        throws IOException, UnresolvedLinkException {
1961        HdfsFileStatus resultingStat = null;
1962        FSPermissionChecker pc = getPermissionChecker();
1963        checkOperation(OperationCategory.WRITE);
1964        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1965        writeLock();
1966        try {
1967          checkOperation(OperationCategory.WRITE);
1968          checkNameNodeSafeMode("Cannot set times " + src);
1969          src = FSDirectory.resolvePath(src, pathComponents, dir);
1970    
1971          // Write access is required to set access and modification times
1972          if (isPermissionEnabled) {
1973            checkPathAccess(pc, src, FsAction.WRITE);
1974          }
1975          final INodesInPath iip = dir.getINodesInPath4Write(src);
1976          final INode inode = iip.getLastINode();
1977          if (inode != null) {
1978            dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshotId());
1979            resultingStat = getAuditFileInfo(src, false);
1980          } else {
1981            throw new FileNotFoundException("File/Directory " + src + " does not exist.");
1982          }
1983        } finally {
1984          writeUnlock();
1985        }
1986        logAuditEvent(true, "setTimes", src, null, resultingStat);
1987      }
1988    
1989      /**
1990       * Create a symbolic link.
1991       */
1992      @SuppressWarnings("deprecation")
1993      void createSymlink(String target, String link,
1994          PermissionStatus dirPerms, boolean createParent) 
1995          throws IOException, UnresolvedLinkException {
1996        if (!FileSystem.areSymlinksEnabled()) {
1997          throw new UnsupportedOperationException("Symlinks not supported");
1998        }
1999        if (!DFSUtil.isValidName(link)) {
2000          throw new InvalidPathException("Invalid link name: " + link);
2001        }
2002        if (FSDirectory.isReservedName(target)) {
2003          throw new InvalidPathException("Invalid target name: " + target);
2004        }
2005        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
2006        if (cacheEntry != null && cacheEntry.isSuccess()) {
2007          return; // Return previous response
2008        }
2009        boolean success = false;
2010        try {
2011          createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
2012          success = true;
2013        } catch (AccessControlException e) {
2014          logAuditEvent(false, "createSymlink", link, target, null);
2015          throw e;
2016        } finally {
2017          RetryCache.setState(cacheEntry, success);
2018        }
2019      }
2020    
2021      private void createSymlinkInt(String target, String link,
2022          PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 
2023          throws IOException, UnresolvedLinkException {
2024        if (NameNode.stateChangeLog.isDebugEnabled()) {
2025          NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
2026              + target + " link=" + link);
2027        }
2028        HdfsFileStatus resultingStat = null;
2029        FSPermissionChecker pc = getPermissionChecker();
2030        checkOperation(OperationCategory.WRITE);
2031        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
2032        writeLock();
2033        try {
2034          checkOperation(OperationCategory.WRITE);
2035          checkNameNodeSafeMode("Cannot create symlink " + link);
2036          link = FSDirectory.resolvePath(link, pathComponents, dir);
2037          if (!createParent) {
2038            verifyParentDir(link);
2039          }
2040          if (!dir.isValidToCreate(link)) {
2041            throw new IOException("failed to create link " + link 
2042                +" either because the filename is invalid or the file exists");
2043          }
2044          if (isPermissionEnabled) {
2045            checkAncestorAccess(pc, link, FsAction.WRITE);
2046          }
2047          // validate that we have enough inodes.
2048          checkFsObjectLimit();
2049    
2050          // add symbolic link to namespace
2051          dir.addSymlink(link, target, dirPerms, createParent, logRetryCache);
2052          resultingStat = getAuditFileInfo(link, false);
2053        } finally {
2054          writeUnlock();
2055        }
2056        getEditLog().logSync();
2057        logAuditEvent(true, "createSymlink", link, target, resultingStat);
2058      }
2059    
2060      /**
2061       * Set replication for an existing file.
2062       * 
2063       * The NameNode sets new replication and schedules either replication of 
2064       * under-replicated data blocks or removal of the excessive block copies 
2065       * if the blocks are over-replicated.
2066       * 
2067       * @see ClientProtocol#setReplication(String, short)
2068       * @param src file name
2069       * @param replication new replication
2070       * @return true if successful; 
2071       *         false if file does not exist or is a directory
2072       */
2073      boolean setReplication(final String src, final short replication)
2074          throws IOException {
2075        try {
2076          return setReplicationInt(src, replication);
2077        } catch (AccessControlException e) {
2078          logAuditEvent(false, "setReplication", src);
2079          throw e;
2080        }
2081      }
2082    
2083      private boolean setReplicationInt(String src, final short replication)
2084          throws IOException {
2085        blockManager.verifyReplication(src, replication, null);
2086        final boolean isFile;
2087        FSPermissionChecker pc = getPermissionChecker();
2088        checkOperation(OperationCategory.WRITE);
2089        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2090        writeLock();
2091        try {
2092          checkOperation(OperationCategory.WRITE);
2093          checkNameNodeSafeMode("Cannot set replication for " + src);
2094          src = FSDirectory.resolvePath(src, pathComponents, dir);
2095          if (isPermissionEnabled) {
2096            checkPathAccess(pc, src, FsAction.WRITE);
2097          }
2098    
2099          final short[] blockRepls = new short[2]; // 0: old, 1: new
2100          final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2101          isFile = blocks != null;
2102          if (isFile) {
2103            blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2104          }
2105        } finally {
2106          writeUnlock();
2107        }
2108    
2109        getEditLog().logSync();
2110        if (isFile) {
2111          logAuditEvent(true, "setReplication", src);
2112        }
2113        return isFile;
2114      }
2115    
2116      long getPreferredBlockSize(String filename) 
2117          throws IOException, UnresolvedLinkException {
2118        FSPermissionChecker pc = getPermissionChecker();
2119        checkOperation(OperationCategory.READ);
2120        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2121        readLock();
2122        try {
2123          checkOperation(OperationCategory.READ);
2124          filename = FSDirectory.resolvePath(filename, pathComponents, dir);
2125          if (isPermissionEnabled) {
2126            checkTraverse(pc, filename);
2127          }
2128          return dir.getPreferredBlockSize(filename);
2129        } finally {
2130          readUnlock();
2131        }
2132      }
2133    
2134      /**
2135       * Verify that parent directory of src exists.
2136       */
2137      private void verifyParentDir(String src) throws FileNotFoundException,
2138          ParentNotDirectoryException, UnresolvedLinkException {
2139        assert hasReadLock();
2140        Path parent = new Path(src).getParent();
2141        if (parent != null) {
2142          final INode parentNode = dir.getINode(parent.toString());
2143          if (parentNode == null) {
2144            throw new FileNotFoundException("Parent directory doesn't exist: "
2145                + parent);
2146          } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2147            throw new ParentNotDirectoryException("Parent path is not a directory: "
2148                + parent);
2149          }
2150        }
2151      }
2152      
2153      /**
2154       * Create a new file entry in the namespace.
2155       * 
2156       * For description of parameters and exceptions thrown see
2157       * {@link ClientProtocol#create()}, except it returns valid file status upon
2158       * success
2159       * 
2160       * For retryCache handling details see -
2161       * {@link #getFileStatus(boolean, CacheEntryWithPayload)}
2162       * 
2163       */
2164      HdfsFileStatus startFile(String src, PermissionStatus permissions,
2165          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2166          boolean createParent, short replication, long blockSize)
2167          throws AccessControlException, SafeModeException,
2168          FileAlreadyExistsException, UnresolvedLinkException,
2169          FileNotFoundException, ParentNotDirectoryException, IOException {
2170        HdfsFileStatus status = null;
2171        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2172            null);
2173        if (cacheEntry != null && cacheEntry.isSuccess()) {
2174          return (HdfsFileStatus) cacheEntry.getPayload();
2175        }
2176        
2177        try {
2178          status = startFileInt(src, permissions, holder, clientMachine, flag,
2179              createParent, replication, blockSize, cacheEntry != null);
2180        } catch (AccessControlException e) {
2181          logAuditEvent(false, "create", src);
2182          throw e;
2183        } finally {
2184          RetryCache.setState(cacheEntry, status != null, status);
2185        }
2186        return status;
2187      }
2188    
2189      private HdfsFileStatus startFileInt(String src, PermissionStatus permissions,
2190          String holder, String clientMachine, EnumSet<CreateFlag> flag,
2191          boolean createParent, short replication, long blockSize,
2192          boolean logRetryCache) throws AccessControlException, SafeModeException,
2193          FileAlreadyExistsException, UnresolvedLinkException,
2194          FileNotFoundException, ParentNotDirectoryException, IOException {
2195        if (NameNode.stateChangeLog.isDebugEnabled()) {
2196          NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
2197              + ", holder=" + holder
2198              + ", clientMachine=" + clientMachine
2199              + ", createParent=" + createParent
2200              + ", replication=" + replication
2201              + ", createFlag=" + flag.toString());
2202        }
2203        if (!DFSUtil.isValidName(src)) {
2204          throw new InvalidPathException(src);
2205        }
2206        blockManager.verifyReplication(src, replication, clientMachine);
2207    
2208        boolean skipSync = false;
2209        HdfsFileStatus stat = null;
2210        FSPermissionChecker pc = getPermissionChecker();
2211        checkOperation(OperationCategory.WRITE);
2212        if (blockSize < minBlockSize) {
2213          throw new IOException("Specified block size is less than configured" +
2214              " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2215              + "): " + blockSize + " < " + minBlockSize);
2216        }
2217        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2218        boolean create = flag.contains(CreateFlag.CREATE);
2219        boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2220        writeLock();
2221        try {
2222          checkOperation(OperationCategory.WRITE);
2223          checkNameNodeSafeMode("Cannot create file" + src);
2224          src = FSDirectory.resolvePath(src, pathComponents, dir);
2225          startFileInternal(pc, src, permissions, holder, clientMachine, create,
2226              overwrite, createParent, replication, blockSize, logRetryCache);
2227          stat = dir.getFileInfo(src, false);
2228        } catch (StandbyException se) {
2229          skipSync = true;
2230          throw se;
2231        } finally {
2232          writeUnlock();
2233          // There might be transactions logged while trying to recover the lease.
2234          // They need to be sync'ed even when an exception was thrown.
2235          if (!skipSync) {
2236            getEditLog().logSync();
2237          }
2238        } 
2239        logAuditEvent(true, "create", src, null, stat);
2240        return stat;
2241      }
2242    
2243      /**
2244       * Create a new file or overwrite an existing file<br>
2245       * 
2246       * Once the file is create the client then allocates a new block with the next
2247       * call using {@link NameNode#addBlock()}.
2248       * <p>
2249       * For description of parameters and exceptions thrown see
2250       * {@link ClientProtocol#create()}
2251       */
2252      private void startFileInternal(FSPermissionChecker pc, String src,
2253          PermissionStatus permissions, String holder, String clientMachine,
2254          boolean create, boolean overwrite, boolean createParent,
2255          short replication, long blockSize, boolean logRetryEntry)
2256          throws FileAlreadyExistsException, AccessControlException,
2257          UnresolvedLinkException, FileNotFoundException,
2258          ParentNotDirectoryException, IOException {
2259        assert hasWriteLock();
2260        // Verify that the destination does not exist as a directory already.
2261        final INodesInPath iip = dir.getINodesInPath4Write(src);
2262        final INode inode = iip.getLastINode();
2263        if (inode != null && inode.isDirectory()) {
2264          throw new FileAlreadyExistsException(src +
2265              " already exists as a directory");
2266        }
2267        final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2268        if (isPermissionEnabled) {
2269          if (overwrite && myFile != null) {
2270            checkPathAccess(pc, src, FsAction.WRITE);
2271          } else {
2272            checkAncestorAccess(pc, src, FsAction.WRITE);
2273          }
2274        }
2275    
2276        if (!createParent) {
2277          verifyParentDir(src);
2278        }
2279    
2280        try {
2281          if (myFile == null) {
2282            if (!create) {
2283              throw new FileNotFoundException("Can't overwrite non-existent " +
2284                  src + " for client " + clientMachine);
2285            }
2286          } else {
2287            if (overwrite) {
2288              try {
2289                deleteInt(src, true, false); // File exists - delete if overwrite
2290              } catch (AccessControlException e) {
2291                logAuditEvent(false, "delete", src);
2292                throw e;
2293              }
2294            } else {
2295              // If lease soft limit time is expired, recover the lease
2296              recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2297              throw new FileAlreadyExistsException(src + " for client " +
2298                  clientMachine + " already exists");
2299            }
2300          }
2301    
2302          checkFsObjectLimit();
2303          final DatanodeDescriptor clientNode = 
2304              blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2305    
2306          INodeFile newNode = dir.addFile(src, permissions, replication, blockSize,
2307              holder, clientMachine, clientNode);
2308          if (newNode == null) {
2309            throw new IOException("Unable to add " + src +  " to namespace");
2310          }
2311          leaseManager.addLease(newNode.getFileUnderConstructionFeature()
2312              .getClientName(), src);
2313    
2314          // record file record in log, record new generation stamp
2315          getEditLog().logOpenFile(src, newNode, logRetryEntry);
2316          if (NameNode.stateChangeLog.isDebugEnabled()) {
2317            NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
2318                src + " inode " + newNode.getId() + " " + holder);
2319          }
2320        } catch (IOException ie) {
2321          NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
2322              ie.getMessage());
2323          throw ie;
2324        }
2325      }
2326      
2327      /**
2328       * Append to an existing file for append.
2329       * <p>
2330       * 
2331       * The method returns the last block of the file if this is a partial block,
2332       * which can still be used for writing more data. The client uses the returned
2333       * block locations to form the data pipeline for this block.<br>
2334       * The method returns null if the last block is full. The client then
2335       * allocates a new block with the next call using {@link NameNode#addBlock()}.
2336       * <p>
2337       * 
2338       * For description of parameters and exceptions thrown see
2339       * {@link ClientProtocol#append(String, String)}
2340       * 
2341       * @return the last block locations if the block is partial or null otherwise
2342       */
2343      private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2344          String holder, String clientMachine, boolean logRetryCache)
2345          throws AccessControlException, UnresolvedLinkException,
2346          FileNotFoundException, IOException {
2347        assert hasWriteLock();
2348        // Verify that the destination does not exist as a directory already.
2349        final INodesInPath iip = dir.getINodesInPath4Write(src);
2350        final INode inode = iip.getLastINode();
2351        if (inode != null && inode.isDirectory()) {
2352          throw new FileAlreadyExistsException("Cannot append to directory " + src
2353              + "; already exists as a directory.");
2354        }
2355        if (isPermissionEnabled) {
2356          checkPathAccess(pc, src, FsAction.WRITE);
2357        }
2358    
2359        try {
2360          if (inode == null) {
2361            throw new FileNotFoundException("failed to append to non-existent file "
2362              + src + " for client " + clientMachine);
2363          }
2364          INodeFile myFile = INodeFile.valueOf(inode, src, true);
2365          // Opening an existing file for write - may need to recover lease.
2366          recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2367          
2368          // recoverLeaseInternal may create a new InodeFile via 
2369          // finalizeINodeFileUnderConstruction so we need to refresh 
2370          // the referenced file.  
2371          myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2372          
2373          final DatanodeDescriptor clientNode = 
2374              blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2375          return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode,
2376              true, iip.getLatestSnapshotId(), logRetryCache);
2377        } catch (IOException ie) {
2378          NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2379          throw ie;
2380        }
2381      }
2382      
2383      /**
2384       * Replace current node with a INodeUnderConstruction.
2385       * Recreate in-memory lease record.
2386       * 
2387       * @param src path to the file
2388       * @param file existing file object
2389       * @param leaseHolder identifier of the lease holder on this file
2390       * @param clientMachine identifier of the client machine
2391       * @param clientNode if the client is collocated with a DN, that DN's descriptor
2392       * @param writeToEditLog whether to persist this change to the edit log
2393       * @param logRetryCache whether to record RPC ids in editlog for retry cache
2394       *                      rebuilding
2395       * @return the last block locations if the block is partial or null otherwise
2396       * @throws UnresolvedLinkException
2397       * @throws IOException
2398       */
2399      LocatedBlock prepareFileForWrite(String src, INodeFile file,
2400          String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
2401          boolean writeToEditLog, int latestSnapshot, boolean logRetryCache)
2402          throws IOException {
2403        file = file.recordModification(latestSnapshot);
2404        final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine,
2405            clientNode);
2406    
2407        leaseManager.addLease(cons.getFileUnderConstructionFeature()
2408            .getClientName(), src);
2409        
2410        LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2411        if (writeToEditLog) {
2412          getEditLog().logOpenFile(src, cons, logRetryCache);
2413        }
2414        return ret;
2415      }
2416    
2417      /**
2418       * Recover lease;
2419       * Immediately revoke the lease of the current lease holder and start lease
2420       * recovery so that the file can be forced to be closed.
2421       * 
2422       * @param src the path of the file to start lease recovery
2423       * @param holder the lease holder's name
2424       * @param clientMachine the client machine's name
2425       * @return true if the file is already closed
2426       * @throws IOException
2427       */
2428      boolean recoverLease(String src, String holder, String clientMachine)
2429          throws IOException {
2430        if (!DFSUtil.isValidName(src)) {
2431          throw new IOException("Invalid file name: " + src);
2432        }
2433      
2434        boolean skipSync = false;
2435        FSPermissionChecker pc = getPermissionChecker();
2436        checkOperation(OperationCategory.WRITE);
2437        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2438        writeLock();
2439        try {
2440          checkOperation(OperationCategory.WRITE);
2441          checkNameNodeSafeMode("Cannot recover the lease of " + src);
2442          src = FSDirectory.resolvePath(src, pathComponents, dir);
2443          final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2444          if (!inode.isUnderConstruction()) {
2445            return true;
2446          }
2447          if (isPermissionEnabled) {
2448            checkPathAccess(pc, src, FsAction.WRITE);
2449          }
2450      
2451          recoverLeaseInternal(inode, src, holder, clientMachine, true);
2452        } catch (StandbyException se) {
2453          skipSync = true;
2454          throw se;
2455        } finally {
2456          writeUnlock();
2457          // There might be transactions logged while trying to recover the lease.
2458          // They need to be sync'ed even when an exception was thrown.
2459          if (!skipSync) {
2460            getEditLog().logSync();
2461          }
2462        }
2463        return false;
2464      }
2465    
2466      private void recoverLeaseInternal(INodeFile fileInode, 
2467          String src, String holder, String clientMachine, boolean force)
2468          throws IOException {
2469        assert hasWriteLock();
2470        if (fileInode != null && fileInode.isUnderConstruction()) {
2471          //
2472          // If the file is under construction , then it must be in our
2473          // leases. Find the appropriate lease record.
2474          //
2475          Lease lease = leaseManager.getLease(holder);
2476          //
2477          // We found the lease for this file. And surprisingly the original
2478          // holder is trying to recreate this file. This should never occur.
2479          //
2480          if (!force && lease != null) {
2481            Lease leaseFile = leaseManager.getLeaseByPath(src);
2482            if ((leaseFile != null && leaseFile.equals(lease)) ||
2483                lease.getHolder().equals(holder)) { 
2484              throw new AlreadyBeingCreatedException(
2485                "failed to create file " + src + " for " + holder +
2486                " for client " + clientMachine +
2487                " because current leaseholder is trying to recreate file.");
2488            }
2489          }
2490          //
2491          // Find the original holder.
2492          //
2493          FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature();
2494          String clientName = uc.getClientName();
2495          lease = leaseManager.getLease(clientName);
2496          if (lease == null) {
2497            throw new AlreadyBeingCreatedException(
2498              "failed to create file " + src + " for " + holder +
2499              " for client " + clientMachine +
2500              " because pendingCreates is non-null but no leases found.");
2501          }
2502          if (force) {
2503            // close now: no need to wait for soft lease expiration and 
2504            // close only the file src
2505            LOG.info("recoverLease: " + lease + ", src=" + src +
2506              " from client " + clientName);
2507            internalReleaseLease(lease, src, holder);
2508          } else {
2509            assert lease.getHolder().equals(clientName) :
2510              "Current lease holder " + lease.getHolder() +
2511              " does not match file creator " + clientName;
2512            //
2513            // If the original holder has not renewed in the last SOFTLIMIT 
2514            // period, then start lease recovery.
2515            //
2516            if (lease.expiredSoftLimit()) {
2517              LOG.info("startFile: recover " + lease + ", src=" + src + " client "
2518                  + clientName);
2519              boolean isClosed = internalReleaseLease(lease, src, null);
2520              if(!isClosed)
2521                throw new RecoveryInProgressException(
2522                    "Failed to close file " + src +
2523                    ". Lease recovery is in progress. Try again later.");
2524            } else {
2525              final BlockInfo lastBlock = fileInode.getLastBlock();
2526              if (lastBlock != null
2527                  && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
2528                throw new RecoveryInProgressException("Recovery in progress, file ["
2529                    + src + "], " + "lease owner [" + lease.getHolder() + "]");
2530              } else {
2531                throw new AlreadyBeingCreatedException("Failed to create file ["
2532                    + src + "] for [" + holder + "] for client [" + clientMachine
2533                    + "], because this file is already being created by ["
2534                    + clientName + "] on ["
2535                    + uc.getClientMachine() + "]");
2536              }
2537            }
2538          }
2539        }
2540      }
2541    
2542      /**
2543       * Append to an existing file in the namespace.
2544       */
2545      LocatedBlock appendFile(String src, String holder, String clientMachine)
2546          throws AccessControlException, SafeModeException,
2547          FileAlreadyExistsException, FileNotFoundException,
2548          ParentNotDirectoryException, IOException {
2549        LocatedBlock lb = null;
2550        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2551            null);
2552        if (cacheEntry != null && cacheEntry.isSuccess()) {
2553          return (LocatedBlock) cacheEntry.getPayload();
2554        }
2555          
2556        boolean success = false;
2557        try {
2558          lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
2559          success = true;
2560          return lb;
2561        } catch (AccessControlException e) {
2562          logAuditEvent(false, "append", src);
2563          throw e;
2564        } finally {
2565          RetryCache.setState(cacheEntry, success, lb);
2566        }
2567      }
2568    
2569      private LocatedBlock appendFileInt(String src, String holder,
2570          String clientMachine, boolean logRetryCache)
2571          throws AccessControlException, SafeModeException,
2572          FileAlreadyExistsException, FileNotFoundException,
2573          ParentNotDirectoryException, IOException {
2574        if (NameNode.stateChangeLog.isDebugEnabled()) {
2575          NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
2576              + ", holder=" + holder
2577              + ", clientMachine=" + clientMachine);
2578        }
2579        boolean skipSync = false;
2580        if (!supportAppends) {
2581          throw new UnsupportedOperationException(
2582              "Append is not enabled on this NameNode. Use the " +
2583              DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
2584        }
2585    
2586        LocatedBlock lb = null;
2587        FSPermissionChecker pc = getPermissionChecker();
2588        checkOperation(OperationCategory.WRITE);
2589        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2590        writeLock();
2591        try {
2592          checkOperation(OperationCategory.WRITE);
2593          checkNameNodeSafeMode("Cannot append to file" + src);
2594          src = FSDirectory.resolvePath(src, pathComponents, dir);
2595          lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
2596        } catch (StandbyException se) {
2597          skipSync = true;
2598          throw se;
2599        } finally {
2600          writeUnlock();
2601          // There might be transactions logged while trying to recover the lease.
2602          // They need to be sync'ed even when an exception was thrown.
2603          if (!skipSync) {
2604            getEditLog().logSync();
2605          }
2606        }
2607        if (lb != null) {
2608          if (NameNode.stateChangeLog.isDebugEnabled()) {
2609            NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
2610                +src+" for "+holder+" at "+clientMachine
2611                +" block " + lb.getBlock()
2612                +" block size " + lb.getBlock().getNumBytes());
2613          }
2614        }
2615        logAuditEvent(true, "append", src);
2616        return lb;
2617      }
2618    
2619      ExtendedBlock getExtendedBlock(Block blk) {
2620        return new ExtendedBlock(blockPoolId, blk);
2621      }
2622      
2623      void setBlockPoolId(String bpid) {
2624        blockPoolId = bpid;
2625        blockManager.setBlockPoolId(blockPoolId);
2626      }
2627    
2628      /**
2629       * The client would like to obtain an additional block for the indicated
2630       * filename (which is being written-to).  Return an array that consists
2631       * of the block, plus a set of machines.  The first on this list should
2632       * be where the client writes data.  Subsequent items in the list must
2633       * be provided in the connection to the first datanode.
2634       *
2635       * Make sure the previous blocks have been reported by datanodes and
2636       * are replicated.  Will return an empty 2-elt array if we want the
2637       * client to "try again later".
2638       */
2639      LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
2640          ExtendedBlock previous, Set<Node> excludedNodes, 
2641          List<String> favoredNodes)
2642          throws LeaseExpiredException, NotReplicatedYetException,
2643          QuotaExceededException, SafeModeException, UnresolvedLinkException,
2644          IOException {
2645        long blockSize;
2646        int replication;
2647        DatanodeDescriptor clientNode = null;
2648    
2649        if(NameNode.stateChangeLog.isDebugEnabled()) {
2650          NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: "
2651              + src + " inodeId " +  fileId  + " for " + clientName);
2652        }
2653    
2654        // Part I. Analyze the state of the file with respect to the input data.
2655        checkOperation(OperationCategory.READ);
2656        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2657        readLock();
2658        try {
2659          checkOperation(OperationCategory.READ);
2660          src = FSDirectory.resolvePath(src, pathComponents, dir);
2661          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2662          final INode[] inodes = analyzeFileState(
2663              src, fileId, clientName, previous, onRetryBlock).getINodes();
2664          final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2665    
2666          if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
2667            // This is a retry. Just return the last block if having locations.
2668            return onRetryBlock[0];
2669          }
2670          if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
2671            throw new IOException("File has reached the limit on maximum number of"
2672                + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
2673                + "): " + pendingFile.getBlocks().length + " >= "
2674                + maxBlocksPerFile);
2675          }
2676          blockSize = pendingFile.getPreferredBlockSize();
2677          clientNode = pendingFile.getFileUnderConstructionFeature().getClientNode();
2678          replication = pendingFile.getFileReplication();
2679        } finally {
2680          readUnlock();
2681        }
2682    
2683        // choose targets for the new block to be allocated.
2684        final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget( 
2685            src, replication, clientNode, excludedNodes, blockSize, favoredNodes);
2686    
2687        // Part II.
2688        // Allocate a new block, add it to the INode and the BlocksMap. 
2689        Block newBlock = null;
2690        long offset;
2691        checkOperation(OperationCategory.WRITE);
2692        writeLock();
2693        try {
2694          checkOperation(OperationCategory.WRITE);
2695          // Run the full analysis again, since things could have changed
2696          // while chooseTarget() was executing.
2697          LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2698          INodesInPath inodesInPath =
2699              analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
2700          final INode[] inodes = inodesInPath.getINodes();
2701          final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2702    
2703          if (onRetryBlock[0] != null) {
2704            if (onRetryBlock[0].getLocations().length > 0) {
2705              // This is a retry. Just return the last block if having locations.
2706              return onRetryBlock[0];
2707            } else {
2708              // add new chosen targets to already allocated block and return
2709              BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2710              ((BlockInfoUnderConstruction) lastBlockInFile)
2711                  .setExpectedLocations(targets);
2712              offset = pendingFile.computeFileSize();
2713              return makeLocatedBlock(lastBlockInFile, targets, offset);
2714            }
2715          }
2716    
2717          // commit the last block and complete it if it has minimum replicas
2718          commitOrCompleteLastBlock(pendingFile,
2719                                    ExtendedBlock.getLocalBlock(previous));
2720    
2721          // allocate new block, record block locations in INode.
2722          newBlock = createNewBlock();
2723          saveAllocatedBlock(src, inodesInPath, newBlock, targets);
2724    
2725          dir.persistNewBlock(src, pendingFile);
2726          offset = pendingFile.computeFileSize();
2727        } finally {
2728          writeUnlock();
2729        }
2730        getEditLog().logSync();
2731    
2732        // Return located block
2733        return makeLocatedBlock(newBlock, targets, offset);
2734      }
2735    
2736      INodesInPath analyzeFileState(String src,
2737                                    long fileId,
2738                                    String clientName,
2739                                    ExtendedBlock previous,
2740                                    LocatedBlock[] onRetryBlock)
2741              throws IOException  {
2742        assert hasReadLock();
2743    
2744        checkBlock(previous);
2745        onRetryBlock[0] = null;
2746        checkOperation(OperationCategory.WRITE);
2747        checkNameNodeSafeMode("Cannot add block to " + src);
2748    
2749        // have we exceeded the configured limit of fs objects.
2750        checkFsObjectLimit();
2751    
2752        Block previousBlock = ExtendedBlock.getLocalBlock(previous);
2753        final INodesInPath iip = dir.getINodesInPath4Write(src);
2754        final INodeFile pendingFile
2755            = checkLease(src, fileId, clientName, iip.getLastINode());
2756        BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2757        if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
2758          // The block that the client claims is the current last block
2759          // doesn't match up with what we think is the last block. There are
2760          // four possibilities:
2761          // 1) This is the first block allocation of an append() pipeline
2762          //    which started appending exactly at a block boundary.
2763          //    In this case, the client isn't passed the previous block,
2764          //    so it makes the allocateBlock() call with previous=null.
2765          //    We can distinguish this since the last block of the file
2766          //    will be exactly a full block.
2767          // 2) This is a retry from a client that missed the response of a
2768          //    prior getAdditionalBlock() call, perhaps because of a network
2769          //    timeout, or because of an HA failover. In that case, we know
2770          //    by the fact that the client is re-issuing the RPC that it
2771          //    never began to write to the old block. Hence it is safe to
2772          //    to return the existing block.
2773          // 3) This is an entirely bogus request/bug -- we should error out
2774          //    rather than potentially appending a new block with an empty
2775          //    one in the middle, etc
2776          // 4) This is a retry from a client that timed out while
2777          //    the prior getAdditionalBlock() is still being processed,
2778          //    currently working on chooseTarget(). 
2779          //    There are no means to distinguish between the first and 
2780          //    the second attempts in Part I, because the first one hasn't
2781          //    changed the namesystem state yet.
2782          //    We run this analysis again in Part II where case 4 is impossible.
2783    
2784          BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
2785          if (previous == null &&
2786              lastBlockInFile != null &&
2787              lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
2788              lastBlockInFile.isComplete()) {
2789            // Case 1
2790            if (NameNode.stateChangeLog.isDebugEnabled()) {
2791               NameNode.stateChangeLog.debug(
2792                   "BLOCK* NameSystem.allocateBlock: handling block allocation" +
2793                   " writing to a file with a complete previous block: src=" +
2794                   src + " lastBlock=" + lastBlockInFile);
2795            }
2796          } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
2797            if (lastBlockInFile.getNumBytes() != 0) {
2798              throw new IOException(
2799                  "Request looked like a retry to allocate block " +
2800                  lastBlockInFile + " but it already contains " +
2801                  lastBlockInFile.getNumBytes() + " bytes");
2802            }
2803    
2804            // Case 2
2805            // Return the last block.
2806            NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
2807                "caught retry for allocation of a new block in " +
2808                src + ". Returning previously allocated block " + lastBlockInFile);
2809            long offset = pendingFile.computeFileSize();
2810            onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
2811                ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
2812                offset);
2813            return iip;
2814          } else {
2815            // Case 3
2816            throw new IOException("Cannot allocate block in " + src + ": " +
2817                "passed 'previous' block " + previous + " does not match actual " +
2818                "last block in file " + lastBlockInFile);
2819          }
2820        }
2821    
2822        // Check if the penultimate block is minimally replicated
2823        if (!checkFileProgress(pendingFile, false)) {
2824          throw new NotReplicatedYetException("Not replicated yet: " + src);
2825        }
2826        return iip;
2827      }
2828    
2829      LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
2830                                            long offset) throws IOException {
2831        LocatedBlock lBlk = new LocatedBlock(
2832            getExtendedBlock(blk), locs, offset, false);
2833        getBlockManager().setBlockToken(
2834            lBlk, BlockTokenSecretManager.AccessMode.WRITE);
2835        return lBlk;
2836      }
2837    
2838      /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */
2839      LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk,
2840          final DatanodeInfo[] existings, final String[] storageIDs,
2841          final Set<Node> excludes,
2842          final int numAdditionalNodes, final String clientName
2843          ) throws IOException {
2844        //check if the feature is enabled
2845        dtpReplaceDatanodeOnFailure.checkEnabled();
2846    
2847        final DatanodeDescriptor clientnode;
2848        final long preferredblocksize;
2849        final List<DatanodeStorageInfo> chosen;
2850        checkOperation(OperationCategory.READ);
2851        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2852        readLock();
2853        try {
2854          checkOperation(OperationCategory.READ);
2855          //check safe mode
2856          checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
2857          src = FSDirectory.resolvePath(src, pathComponents, dir);
2858    
2859          //check lease
2860          final INodeFile file = checkLease(src, clientName);
2861          clientnode = file.getFileUnderConstructionFeature().getClientNode();
2862          preferredblocksize = file.getPreferredBlockSize();
2863    
2864          //find datanode storages
2865          final DatanodeManager dm = blockManager.getDatanodeManager();
2866          chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
2867        } finally {
2868          readUnlock();
2869        }
2870    
2871        // choose new datanodes.
2872        final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy(
2873            ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true,
2874                // TODO: get storage type from the file
2875            excludes, preferredblocksize, StorageType.DEFAULT);
2876        final LocatedBlock lb = new LocatedBlock(blk, targets);
2877        blockManager.setBlockToken(lb, AccessMode.COPY);
2878        return lb;
2879      }
2880    
2881      /**
2882       * The client would like to let go of the given block
2883       */
2884      boolean abandonBlock(ExtendedBlock b, String src, String holder)
2885          throws LeaseExpiredException, FileNotFoundException,
2886          UnresolvedLinkException, IOException {
2887        if(NameNode.stateChangeLog.isDebugEnabled()) {
2888          NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
2889              + "of file " + src);
2890        }
2891        checkOperation(OperationCategory.WRITE);
2892        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2893        writeLock();
2894        try {
2895          checkOperation(OperationCategory.WRITE);
2896          checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src);
2897          src = FSDirectory.resolvePath(src, pathComponents, dir);
2898    
2899          //
2900          // Remove the block from the pending creates list
2901          //
2902          INodeFile file = checkLease(src, holder);
2903          boolean removed = dir.removeBlock(src, file,
2904              ExtendedBlock.getLocalBlock(b));
2905          if (!removed) {
2906            return true;
2907          }
2908          if(NameNode.stateChangeLog.isDebugEnabled()) {
2909            NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
2910                                          + b + " is removed from pendingCreates");
2911          }
2912          dir.persistBlocks(src, file, false);
2913        } finally {
2914          writeUnlock();
2915        }
2916        getEditLog().logSync();
2917    
2918        return true;
2919      }
2920      
2921      /** make sure that we still have the lease on this file. */
2922      private INodeFile checkLease(String src, String holder)
2923          throws LeaseExpiredException, UnresolvedLinkException,
2924          FileNotFoundException {
2925        return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder,
2926            dir.getINode(src));
2927      }
2928      
2929      private INodeFile checkLease(String src, long fileId, String holder,
2930          INode inode) throws LeaseExpiredException, FileNotFoundException {
2931        assert hasReadLock();
2932        if (inode == null || !inode.isFile()) {
2933          Lease lease = leaseManager.getLease(holder);
2934          throw new LeaseExpiredException(
2935              "No lease on " + src + ": File does not exist. "
2936              + (lease != null ? lease.toString()
2937                  : "Holder " + holder + " does not have any open files."));
2938        }
2939        final INodeFile file = inode.asFile();
2940        if (!file.isUnderConstruction()) {
2941          Lease lease = leaseManager.getLease(holder);
2942          throw new LeaseExpiredException(
2943              "No lease on " + src + ": File is not open for writing. "
2944              + (lease != null ? lease.toString()
2945                  : "Holder " + holder + " does not have any open files."));
2946        }
2947        String clientName = file.getFileUnderConstructionFeature().getClientName();
2948        if (holder != null && !clientName.equals(holder)) {
2949          throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
2950              + clientName + " but is accessed by " + holder);
2951        }
2952        INodeId.checkId(fileId, file);
2953        return file;
2954      }
2955     
2956      /**
2957       * Complete in-progress write to the given file.
2958       * @return true if successful, false if the client should continue to retry
2959       *         (e.g if not all blocks have reached minimum replication yet)
2960       * @throws IOException on error (eg lease mismatch, file not open, file deleted)
2961       */
2962      boolean completeFile(String src, String holder,
2963                           ExtendedBlock last, long fileId)
2964        throws SafeModeException, UnresolvedLinkException, IOException {
2965        if (NameNode.stateChangeLog.isDebugEnabled()) {
2966          NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
2967              src + " for " + holder);
2968        }
2969        checkBlock(last);
2970        boolean success = false;
2971        checkOperation(OperationCategory.WRITE);
2972        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2973        writeLock();
2974        try {
2975          checkOperation(OperationCategory.WRITE);
2976          checkNameNodeSafeMode("Cannot complete file " + src);
2977          src = FSDirectory.resolvePath(src, pathComponents, dir);
2978          success = completeFileInternal(src, holder,
2979            ExtendedBlock.getLocalBlock(last), fileId);
2980        } finally {
2981          writeUnlock();
2982        }
2983        getEditLog().logSync();
2984        if (success) {
2985          NameNode.stateChangeLog.info("DIR* completeFile: " + src
2986              + " is closed by " + holder);
2987        }
2988        return success;
2989      }
2990    
2991      private boolean completeFileInternal(String src, 
2992          String holder, Block last, long fileId) throws SafeModeException,
2993          UnresolvedLinkException, IOException {
2994        assert hasWriteLock();
2995        final INodesInPath iip = dir.getLastINodeInPath(src);
2996        final INodeFile pendingFile;
2997        try {
2998          pendingFile = checkLease(src, fileId, holder, iip.getINode(0));
2999        } catch (LeaseExpiredException lee) {
3000          final INode inode = dir.getINode(src);
3001          if (inode != null
3002              && inode.isFile()
3003              && !inode.asFile().isUnderConstruction()) {
3004            // This could be a retry RPC - i.e the client tried to close
3005            // the file, but missed the RPC response. Thus, it is trying
3006            // again to close the file. If the file still exists and
3007            // the client's view of the last block matches the actual
3008            // last block, then we'll treat it as a successful close.
3009            // See HDFS-3031.
3010            final Block realLastBlock = inode.asFile().getLastBlock();
3011            if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
3012              NameNode.stateChangeLog.info("DIR* completeFile: " +
3013                  "request from " + holder + " to complete " + src +
3014                  " which is already closed. But, it appears to be an RPC " +
3015                  "retry. Returning success");
3016              return true;
3017            }
3018          }
3019          throw lee;
3020        }
3021        // Check the state of the penultimate block. It should be completed
3022        // before attempting to complete the last one.
3023        if (!checkFileProgress(pendingFile, false)) {
3024          return false;
3025        }
3026    
3027        // commit the last block and complete it if it has minimum replicas
3028        commitOrCompleteLastBlock(pendingFile, last);
3029    
3030        if (!checkFileProgress(pendingFile, true)) {
3031          return false;
3032        }
3033    
3034        finalizeINodeFileUnderConstruction(src, pendingFile,
3035            iip.getLatestSnapshotId());
3036        return true;
3037      }
3038    
3039      /**
3040       * Save allocated block at the given pending filename
3041       * 
3042       * @param src path to the file
3043       * @param inodesInPath representing each of the components of src. 
3044       *                     The last INode is the INode for the file.
3045       * @throws QuotaExceededException If addition of block exceeds space quota
3046       */
3047      BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
3048          Block newBlock, DatanodeStorageInfo[] targets)
3049              throws IOException {
3050        assert hasWriteLock();
3051        BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
3052        NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
3053            + getBlockPoolId() + " " + b);
3054        DatanodeStorageInfo.incrementBlocksScheduled(targets);
3055        return b;
3056      }
3057    
3058      /**
3059       * Create new block with a unique block id and a new generation stamp.
3060       */
3061      Block createNewBlock() throws IOException {
3062        assert hasWriteLock();
3063        Block b = new Block(nextBlockId(), 0, 0);
3064        // Increment the generation stamp for every new block.
3065        b.setGenerationStamp(nextGenerationStamp(false));
3066        return b;
3067      }
3068    
3069      /**
3070       * Check that the indicated file's blocks are present and
3071       * replicated.  If not, return false. If checkall is true, then check
3072       * all blocks, otherwise check only penultimate block.
3073       */
3074      boolean checkFileProgress(INodeFile v, boolean checkall) {
3075        readLock();
3076        try {
3077          if (checkall) {
3078            //
3079            // check all blocks of the file.
3080            //
3081            for (BlockInfo block: v.getBlocks()) {
3082              if (!block.isComplete()) {
3083                LOG.info("BLOCK* checkFileProgress: " + block
3084                    + " has not reached minimal replication "
3085                    + blockManager.minReplication);
3086                return false;
3087              }
3088            }
3089          } else {
3090            //
3091            // check the penultimate block of this file
3092            //
3093            BlockInfo b = v.getPenultimateBlock();
3094            if (b != null && !b.isComplete()) {
3095              LOG.warn("BLOCK* checkFileProgress: " + b
3096                  + " has not reached minimal replication "
3097                  + blockManager.minReplication);
3098              return false;
3099            }
3100          }
3101          return true;
3102        } finally {
3103          readUnlock();
3104        }
3105      }
3106    
3107      ////////////////////////////////////////////////////////////////
3108      // Here's how to handle block-copy failure during client write:
3109      // -- As usual, the client's write should result in a streaming
3110      // backup write to a k-machine sequence.
3111      // -- If one of the backup machines fails, no worries.  Fail silently.
3112      // -- Before client is allowed to close and finalize file, make sure
3113      // that the blocks are backed up.  Namenode may have to issue specific backup
3114      // commands to make up for earlier datanode failures.  Once all copies
3115      // are made, edit namespace and return to client.
3116      ////////////////////////////////////////////////////////////////
3117    
3118      /** 
3119       * Change the indicated filename. 
3120       * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3121       */
3122      @Deprecated
3123      boolean renameTo(String src, String dst) 
3124          throws IOException, UnresolvedLinkException {
3125        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3126        if (cacheEntry != null && cacheEntry.isSuccess()) {
3127          return true; // Return previous response
3128        }
3129        boolean ret = false;
3130        try {
3131          ret = renameToInt(src, dst, cacheEntry != null);
3132        } catch (AccessControlException e) {
3133          logAuditEvent(false, "rename", src, dst, null);
3134          throw e;
3135        } finally {
3136          RetryCache.setState(cacheEntry, ret);
3137        }
3138        return ret;
3139      }
3140    
3141      private boolean renameToInt(String src, String dst, boolean logRetryCache) 
3142        throws IOException, UnresolvedLinkException {
3143        if (NameNode.stateChangeLog.isDebugEnabled()) {
3144          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3145              " to " + dst);
3146        }
3147        if (!DFSUtil.isValidName(dst)) {
3148          throw new IOException("Invalid name: " + dst);
3149        }
3150        FSPermissionChecker pc = getPermissionChecker();
3151        checkOperation(OperationCategory.WRITE);
3152        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3153        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3154        boolean status = false;
3155        HdfsFileStatus resultingStat = null;
3156        writeLock();
3157        try {
3158          checkOperation(OperationCategory.WRITE);
3159          checkNameNodeSafeMode("Cannot rename " + src);
3160          src = FSDirectory.resolvePath(src, srcComponents, dir);
3161          dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3162          checkOperation(OperationCategory.WRITE);
3163          status = renameToInternal(pc, src, dst, logRetryCache);
3164          if (status) {
3165            resultingStat = getAuditFileInfo(dst, false);
3166          }
3167        } finally {
3168          writeUnlock();
3169        }
3170        getEditLog().logSync();
3171        if (status) {
3172          logAuditEvent(true, "rename", src, dst, resultingStat);
3173        }
3174        return status;
3175      }
3176    
3177      /** @deprecated See {@link #renameTo(String, String)} */
3178      @Deprecated
3179      private boolean renameToInternal(FSPermissionChecker pc, String src,
3180          String dst, boolean logRetryCache) throws IOException,
3181          UnresolvedLinkException {
3182        assert hasWriteLock();
3183        if (isPermissionEnabled) {
3184          //We should not be doing this.  This is move() not renameTo().
3185          //but for now,
3186          //NOTE: yes, this is bad!  it's assuming much lower level behavior
3187          //      of rewriting the dst
3188          String actualdst = dir.isDir(dst)?
3189              dst + Path.SEPARATOR + new Path(src).getName(): dst;
3190          // Rename does not operates on link targets
3191          // Do not resolveLink when checking permissions of src and dst
3192          // Check write access to parent of src
3193          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3194          // Check write access to ancestor of dst
3195          checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3196              false);
3197        }
3198    
3199        if (dir.renameTo(src, dst, logRetryCache)) {
3200          return true;
3201        }
3202        return false;
3203      }
3204      
3205    
3206      /** Rename src to dst */
3207      void renameTo(String src, String dst, Options.Rename... options)
3208          throws IOException, UnresolvedLinkException {
3209        if (NameNode.stateChangeLog.isDebugEnabled()) {
3210          NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3211              + src + " to " + dst);
3212        }
3213        if (!DFSUtil.isValidName(dst)) {
3214          throw new InvalidPathException("Invalid name: " + dst);
3215        }
3216        final FSPermissionChecker pc = getPermissionChecker();
3217        
3218        checkOperation(OperationCategory.WRITE);
3219        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3220        if (cacheEntry != null && cacheEntry.isSuccess()) {
3221          return; // Return previous response
3222        }
3223        byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3224        byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3225        HdfsFileStatus resultingStat = null;
3226        boolean success = false;
3227        writeLock();
3228        try {
3229          checkOperation(OperationCategory.WRITE);
3230          checkNameNodeSafeMode("Cannot rename " + src);
3231          src = FSDirectory.resolvePath(src, srcComponents, dir);
3232          dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3233          renameToInternal(pc, src, dst, cacheEntry != null, options);
3234          resultingStat = getAuditFileInfo(dst, false);
3235          success = true;
3236        } finally {
3237          writeUnlock();
3238          RetryCache.setState(cacheEntry, success);
3239        }
3240        getEditLog().logSync();
3241        if (resultingStat != null) {
3242          StringBuilder cmd = new StringBuilder("rename options=");
3243          for (Rename option : options) {
3244            cmd.append(option.value()).append(" ");
3245          }
3246          logAuditEvent(true, cmd.toString(), src, dst, resultingStat);
3247        }
3248      }
3249    
3250      private void renameToInternal(FSPermissionChecker pc, String src, String dst,
3251          boolean logRetryCache, Options.Rename... options) throws IOException {
3252        assert hasWriteLock();
3253        if (isPermissionEnabled) {
3254          // Rename does not operates on link targets
3255          // Do not resolveLink when checking permissions of src and dst
3256          // Check write access to parent of src
3257          checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3258          // Check write access to ancestor of dst
3259          checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
3260        }
3261    
3262        dir.renameTo(src, dst, logRetryCache, options);
3263      }
3264      
3265      /**
3266       * Remove the indicated file from namespace.
3267       * 
3268       * @see ClientProtocol#delete(String, boolean) for detailed description and 
3269       * description of exceptions
3270       */
3271      boolean delete(String src, boolean recursive)
3272          throws AccessControlException, SafeModeException,
3273          UnresolvedLinkException, IOException {
3274        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3275        if (cacheEntry != null && cacheEntry.isSuccess()) {
3276          return true; // Return previous response
3277        }
3278        boolean ret = false;
3279        try {
3280          ret = deleteInt(src, recursive, cacheEntry != null);
3281        } catch (AccessControlException e) {
3282          logAuditEvent(false, "delete", src);
3283          throw e;
3284        } finally {
3285          RetryCache.setState(cacheEntry, ret);
3286        }
3287        return ret;
3288      }
3289          
3290      private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3291          throws AccessControlException, SafeModeException,
3292          UnresolvedLinkException, IOException {
3293        if (NameNode.stateChangeLog.isDebugEnabled()) {
3294          NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3295        }
3296        boolean status = deleteInternal(src, recursive, true, logRetryCache);
3297        if (status) {
3298          logAuditEvent(true, "delete", src);
3299        }
3300        return status;
3301      }
3302        
3303      private FSPermissionChecker getPermissionChecker()
3304          throws AccessControlException {
3305        try {
3306          return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3307        } catch (IOException ioe) {
3308          throw new AccessControlException(ioe);
3309        }
3310      }
3311      
3312      /**
3313       * Remove a file/directory from the namespace.
3314       * <p>
3315       * For large directories, deletion is incremental. The blocks under
3316       * the directory are collected and deleted a small number at a time holding
3317       * the {@link FSNamesystem} lock.
3318       * <p>
3319       * For small directory or file the deletion is done in one shot.
3320       * 
3321       * @see ClientProtocol#delete(String, boolean) for description of exceptions
3322       */
3323      private boolean deleteInternal(String src, boolean recursive,
3324          boolean enforcePermission, boolean logRetryCache)
3325          throws AccessControlException, SafeModeException, UnresolvedLinkException,
3326                 IOException {
3327        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3328        List<INode> removedINodes = new ChunkedArrayList<INode>();
3329        FSPermissionChecker pc = getPermissionChecker();
3330        checkOperation(OperationCategory.WRITE);
3331        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3332        boolean ret = false;
3333        writeLock();
3334        try {
3335          checkOperation(OperationCategory.WRITE);
3336          checkNameNodeSafeMode("Cannot delete " + src);
3337          src = FSDirectory.resolvePath(src, pathComponents, dir);
3338          if (!recursive && dir.isNonEmptyDirectory(src)) {
3339            throw new IOException(src + " is non empty");
3340          }
3341          if (enforcePermission && isPermissionEnabled) {
3342            checkPermission(pc, src, false, null, FsAction.WRITE, null,
3343                FsAction.ALL, false);
3344          }
3345          // Unlink the target directory from directory tree
3346          if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
3347            return false;
3348          }
3349          ret = true;
3350        } finally {
3351          writeUnlock();
3352        }
3353        getEditLog().logSync(); 
3354        removeBlocks(collectedBlocks); // Incremental deletion of blocks
3355        collectedBlocks.clear();
3356        dir.writeLock();
3357        try {
3358          dir.removeFromInodeMap(removedINodes);
3359        } finally {
3360          dir.writeUnlock();
3361        }
3362        removedINodes.clear();
3363        if (NameNode.stateChangeLog.isDebugEnabled()) {
3364          NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3365            + src +" is removed");
3366        }
3367        return ret;
3368      }
3369    
3370      /**
3371       * From the given list, incrementally remove the blocks from blockManager
3372       * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
3373       * ensure that other waiters on the lock can get in. See HDFS-2938
3374       * 
3375       * @param blocks
3376       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
3377       *          of blocks that need to be removed from blocksMap
3378       */
3379      void removeBlocks(BlocksMapUpdateInfo blocks) {
3380        List<Block> toDeleteList = blocks.getToDeleteList();
3381        Iterator<Block> iter = toDeleteList.iterator();
3382        while (iter.hasNext()) {
3383          writeLock();
3384          try {
3385            for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
3386              blockManager.removeBlock(iter.next());
3387            }
3388          } finally {
3389            writeUnlock();
3390          }
3391        }
3392      }
3393      
3394      /**
3395       * Remove leases, inodes and blocks related to a given path
3396       * @param src The given path
3397       * @param blocks Containing the list of blocks to be deleted from blocksMap
3398       * @param removedINodes Containing the list of inodes to be removed from 
3399       *                      inodesMap
3400       */
3401      void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
3402          List<INode> removedINodes) {
3403        assert hasWriteLock();
3404        leaseManager.removeLeaseWithPrefixPath(src);
3405        // remove inodes from inodesMap
3406        if (removedINodes != null) {
3407          dir.removeFromInodeMap(removedINodes);
3408          removedINodes.clear();
3409        }
3410        if (blocks == null) {
3411          return;
3412        }
3413        
3414        removeBlocksAndUpdateSafemodeTotal(blocks);
3415      }
3416    
3417      /**
3418       * Removes the blocks from blocksmap and updates the safemode blocks total
3419       * 
3420       * @param blocks
3421       *          An instance of {@link BlocksMapUpdateInfo} which contains a list
3422       *          of blocks that need to be removed from blocksMap
3423       */
3424      void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
3425        assert hasWriteLock();
3426        // In the case that we are a Standby tailing edits from the
3427        // active while in safe-mode, we need to track the total number
3428        // of blocks and safe blocks in the system.
3429        boolean trackBlockCounts = isSafeModeTrackingBlocks();
3430        int numRemovedComplete = 0, numRemovedSafe = 0;
3431    
3432        for (Block b : blocks.getToDeleteList()) {
3433          if (trackBlockCounts) {
3434            BlockInfo bi = getStoredBlock(b);
3435            if (bi.isComplete()) {
3436              numRemovedComplete++;
3437              if (bi.numNodes() >= blockManager.minReplication) {
3438                numRemovedSafe++;
3439              }
3440            }
3441          }
3442          blockManager.removeBlock(b);
3443        }
3444        if (trackBlockCounts) {
3445          if (LOG.isDebugEnabled()) {
3446            LOG.debug("Adjusting safe-mode totals for deletion."
3447                + "decreasing safeBlocks by " + numRemovedSafe
3448                + ", totalBlocks by " + numRemovedComplete);
3449          }
3450          adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
3451        }
3452      }
3453    
3454      /**
3455       * @see SafeModeInfo#shouldIncrementallyTrackBlocks
3456       */
3457      private boolean isSafeModeTrackingBlocks() {
3458        if (!haEnabled) {
3459          // Never track blocks incrementally in non-HA code.
3460          return false;
3461        }
3462        SafeModeInfo sm = this.safeMode;
3463        return sm != null && sm.shouldIncrementallyTrackBlocks();
3464      }
3465    
3466      /**
3467       * Get the file info for a specific file.
3468       *
3469       * @param src The string representation of the path to the file
3470       * @param resolveLink whether to throw UnresolvedLinkException 
3471       *        if src refers to a symlink
3472       *
3473       * @throws AccessControlException if access is denied
3474       * @throws UnresolvedLinkException if a symlink is encountered.
3475       *
3476       * @return object containing information regarding the file
3477       *         or null if file not found
3478       * @throws StandbyException 
3479       */
3480      HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
3481        throws AccessControlException, UnresolvedLinkException,
3482               StandbyException, IOException {
3483        if (!DFSUtil.isValidName(src)) {
3484          throw new InvalidPathException("Invalid file name: " + src);
3485        }
3486        HdfsFileStatus stat = null;
3487        FSPermissionChecker pc = getPermissionChecker();
3488        checkOperation(OperationCategory.READ);
3489        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3490        readLock();
3491        try {
3492          checkOperation(OperationCategory.READ);
3493          src = FSDirectory.resolvePath(src, pathComponents, dir);
3494          if (isPermissionEnabled) {
3495            checkPermission(pc, src, false, null, null, null, null, resolveLink);
3496          }
3497          stat = dir.getFileInfo(src, resolveLink);
3498        } catch (AccessControlException e) {
3499          logAuditEvent(false, "getfileinfo", src);
3500          throw e;
3501        } finally {
3502          readUnlock();
3503        }
3504        logAuditEvent(true, "getfileinfo", src);
3505        return stat;
3506      }
3507      
3508      /**
3509       * Returns true if the file is closed
3510       */
3511      boolean isFileClosed(String src) 
3512          throws AccessControlException, UnresolvedLinkException,
3513          StandbyException, IOException {
3514        FSPermissionChecker pc = getPermissionChecker();  
3515        checkOperation(OperationCategory.READ);
3516        readLock();
3517        try {
3518          checkOperation(OperationCategory.READ);
3519          if (isPermissionEnabled) {
3520            checkTraverse(pc, src);
3521          }
3522          return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
3523        } catch (AccessControlException e) {
3524          if (isAuditEnabled() && isExternalInvocation()) {
3525            logAuditEvent(false, "isFileClosed", src);
3526          }
3527          throw e;
3528        } finally {
3529          readUnlock();
3530        }
3531      }
3532    
3533      /**
3534       * Create all the necessary directories
3535       */
3536      boolean mkdirs(String src, PermissionStatus permissions,
3537          boolean createParent) throws IOException, UnresolvedLinkException {
3538        boolean ret = false;
3539        try {
3540          ret = mkdirsInt(src, permissions, createParent);
3541        } catch (AccessControlException e) {
3542          logAuditEvent(false, "mkdirs", src);
3543          throw e;
3544        }
3545        return ret;
3546      }
3547    
3548      private boolean mkdirsInt(String src, PermissionStatus permissions,
3549          boolean createParent) throws IOException, UnresolvedLinkException {
3550        if(NameNode.stateChangeLog.isDebugEnabled()) {
3551          NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
3552        }
3553        if (!DFSUtil.isValidName(src)) {
3554          throw new InvalidPathException(src);
3555        }
3556        FSPermissionChecker pc = getPermissionChecker();
3557        checkOperation(OperationCategory.WRITE);
3558        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3559        HdfsFileStatus resultingStat = null;
3560        boolean status = false;
3561        writeLock();
3562        try {
3563          checkOperation(OperationCategory.WRITE);   
3564          checkNameNodeSafeMode("Cannot create directory " + src);
3565          src = FSDirectory.resolvePath(src, pathComponents, dir);
3566          status = mkdirsInternal(pc, src, permissions, createParent);
3567          if (status) {
3568            resultingStat = dir.getFileInfo(src, false);
3569          }
3570        } finally {
3571          writeUnlock();
3572        }
3573        getEditLog().logSync();
3574        if (status) {
3575          logAuditEvent(true, "mkdirs", src, null, resultingStat);
3576        }
3577        return status;
3578      }
3579        
3580      /**
3581       * Create all the necessary directories
3582       */
3583      private boolean mkdirsInternal(FSPermissionChecker pc, String src,
3584          PermissionStatus permissions, boolean createParent) 
3585          throws IOException, UnresolvedLinkException {
3586        assert hasWriteLock();
3587        if (isPermissionEnabled) {
3588          checkTraverse(pc, src);
3589        }
3590        if (dir.isDirMutable(src)) {
3591          // all the users of mkdirs() are used to expect 'true' even if
3592          // a new directory is not created.
3593          return true;
3594        }
3595        if (isPermissionEnabled) {
3596          checkAncestorAccess(pc, src, FsAction.WRITE);
3597        }
3598        if (!createParent) {
3599          verifyParentDir(src);
3600        }
3601    
3602        // validate that we have enough inodes. This is, at best, a 
3603        // heuristic because the mkdirs() operation might need to 
3604        // create multiple inodes.
3605        checkFsObjectLimit();
3606    
3607        if (!dir.mkdirs(src, permissions, false, now())) {
3608          throw new IOException("Failed to create directory: " + src);
3609        }
3610        return true;
3611      }
3612    
3613      /**
3614       * Get the content summary for a specific file/dir.
3615       *
3616       * @param src The string representation of the path to the file
3617       *
3618       * @throws AccessControlException if access is denied
3619       * @throws UnresolvedLinkException if a symlink is encountered.
3620       * @throws FileNotFoundException if no file exists
3621       * @throws StandbyException
3622       * @throws IOException for issues with writing to the audit log
3623       *
3624       * @return object containing information regarding the file
3625       *         or null if file not found
3626       */
3627      ContentSummary getContentSummary(String src) throws IOException {
3628        FSPermissionChecker pc = getPermissionChecker();
3629        checkOperation(OperationCategory.READ);
3630        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3631        readLock();
3632        boolean success = true;
3633        try {
3634          checkOperation(OperationCategory.READ);
3635          src = FSDirectory.resolvePath(src, pathComponents, dir);
3636          if (isPermissionEnabled) {
3637            checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
3638          }
3639          return dir.getContentSummary(src);
3640    
3641        } catch (AccessControlException ace) {
3642          success = false;
3643          throw ace;
3644        } finally {
3645          readUnlock();
3646          logAuditEvent(success, "contentSummary", src);
3647        }
3648      }
3649    
3650      /**
3651       * Set the namespace quota and diskspace quota for a directory.
3652       * See {@link ClientProtocol#setQuota(String, long, long)} for the 
3653       * contract.
3654       * 
3655       * Note: This does not support ".inodes" relative path.
3656       */
3657      void setQuota(String path, long nsQuota, long dsQuota) 
3658          throws IOException, UnresolvedLinkException {
3659        checkSuperuserPrivilege();
3660        checkOperation(OperationCategory.WRITE);
3661        writeLock();
3662        try {
3663          checkOperation(OperationCategory.WRITE);
3664          checkNameNodeSafeMode("Cannot set quota on " + path);
3665          dir.setQuota(path, nsQuota, dsQuota);
3666        } finally {
3667          writeUnlock();
3668        }
3669        getEditLog().logSync();
3670      }
3671    
3672      /** Persist all metadata about this file.
3673       * @param src The string representation of the path
3674       * @param clientName The string representation of the client
3675       * @param lastBlockLength The length of the last block 
3676       *                        under construction reported from client.
3677       * @throws IOException if path does not exist
3678       */
3679      void fsync(String src, String clientName, long lastBlockLength) 
3680          throws IOException, UnresolvedLinkException {
3681        NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
3682        checkOperation(OperationCategory.WRITE);
3683        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3684        writeLock();
3685        try {
3686          checkOperation(OperationCategory.WRITE);
3687          checkNameNodeSafeMode("Cannot fsync file " + src);
3688          src = FSDirectory.resolvePath(src, pathComponents, dir);
3689          INodeFile pendingFile  = checkLease(src, clientName);
3690          if (lastBlockLength > 0) {
3691            pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock(
3692                pendingFile, lastBlockLength);
3693          }
3694          dir.persistBlocks(src, pendingFile, false);
3695        } finally {
3696          writeUnlock();
3697        }
3698        getEditLog().logSync();
3699      }
3700    
3701      /**
3702       * Move a file that is being written to be immutable.
3703       * @param src The filename
3704       * @param lease The lease for the client creating the file
3705       * @param recoveryLeaseHolder reassign lease to this holder if the last block
3706       *        needs recovery; keep current holder if null.
3707       * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
3708       *         replication;<br>
3709       *         RecoveryInProgressException if lease recovery is in progress.<br>
3710       *         IOException in case of an error.
3711       * @return true  if file has been successfully finalized and closed or 
3712       *         false if block recovery has been initiated. Since the lease owner
3713       *         has been changed and logged, caller should call logSync().
3714       */
3715      boolean internalReleaseLease(Lease lease, String src, 
3716          String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
3717          IOException, UnresolvedLinkException {
3718        LOG.info("Recovering " + lease + ", src=" + src);
3719        assert !isInSafeMode();
3720        assert hasWriteLock();
3721    
3722        final INodesInPath iip = dir.getLastINodeInPath(src);
3723        final INodeFile pendingFile = iip.getINode(0).asFile();
3724        int nrBlocks = pendingFile.numBlocks();
3725        BlockInfo[] blocks = pendingFile.getBlocks();
3726    
3727        int nrCompleteBlocks;
3728        BlockInfo curBlock = null;
3729        for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
3730          curBlock = blocks[nrCompleteBlocks];
3731          if(!curBlock.isComplete())
3732            break;
3733          assert blockManager.checkMinReplication(curBlock) :
3734                  "A COMPLETE block is not minimally replicated in " + src;
3735        }
3736    
3737        // If there are no incomplete blocks associated with this file,
3738        // then reap lease immediately and close the file.
3739        if(nrCompleteBlocks == nrBlocks) {
3740          finalizeINodeFileUnderConstruction(src, pendingFile,
3741              iip.getLatestSnapshotId());
3742          NameNode.stateChangeLog.warn("BLOCK*"
3743            + " internalReleaseLease: All existing blocks are COMPLETE,"
3744            + " lease removed, file closed.");
3745          return true;  // closed!
3746        }
3747    
3748        // Only the last and the penultimate blocks may be in non COMPLETE state.
3749        // If the penultimate block is not COMPLETE, then it must be COMMITTED.
3750        if(nrCompleteBlocks < nrBlocks - 2 ||
3751           nrCompleteBlocks == nrBlocks - 2 &&
3752             curBlock != null &&
3753             curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
3754          final String message = "DIR* NameSystem.internalReleaseLease: "
3755            + "attempt to release a create lock on "
3756            + src + " but file is already closed.";
3757          NameNode.stateChangeLog.warn(message);
3758          throw new IOException(message);
3759        }
3760    
3761        // The last block is not COMPLETE, and
3762        // that the penultimate block if exists is either COMPLETE or COMMITTED
3763        final BlockInfo lastBlock = pendingFile.getLastBlock();
3764        BlockUCState lastBlockState = lastBlock.getBlockUCState();
3765        BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3766        boolean penultimateBlockMinReplication;
3767        BlockUCState penultimateBlockState;
3768        if (penultimateBlock == null) {
3769          penultimateBlockState = BlockUCState.COMPLETE;
3770          // If penultimate block doesn't exist then its minReplication is met
3771          penultimateBlockMinReplication = true;
3772        } else {
3773          penultimateBlockState = BlockUCState.COMMITTED;
3774          penultimateBlockMinReplication = 
3775            blockManager.checkMinReplication(penultimateBlock);
3776        }
3777        assert penultimateBlockState == BlockUCState.COMPLETE ||
3778               penultimateBlockState == BlockUCState.COMMITTED :
3779               "Unexpected state of penultimate block in " + src;
3780    
3781        switch(lastBlockState) {
3782        case COMPLETE:
3783          assert false : "Already checked that the last block is incomplete";
3784          break;
3785        case COMMITTED:
3786          // Close file if committed blocks are minimally replicated
3787          if(penultimateBlockMinReplication &&
3788              blockManager.checkMinReplication(lastBlock)) {
3789            finalizeINodeFileUnderConstruction(src, pendingFile,
3790                iip.getLatestSnapshotId());
3791            NameNode.stateChangeLog.warn("BLOCK*"
3792              + " internalReleaseLease: Committed blocks are minimally replicated,"
3793              + " lease removed, file closed.");
3794            return true;  // closed!
3795          }
3796          // Cannot close file right now, since some blocks 
3797          // are not yet minimally replicated.
3798          // This may potentially cause infinite loop in lease recovery
3799          // if there are no valid replicas on data-nodes.
3800          String message = "DIR* NameSystem.internalReleaseLease: " +
3801              "Failed to release lease for file " + src +
3802              ". Committed blocks are waiting to be minimally replicated." +
3803              " Try again later.";
3804          NameNode.stateChangeLog.warn(message);
3805          throw new AlreadyBeingCreatedException(message);
3806        case UNDER_CONSTRUCTION:
3807        case UNDER_RECOVERY:
3808          final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
3809          // setup the last block locations from the blockManager if not known
3810          if (uc.getNumExpectedLocations() == 0) {
3811            uc.setExpectedLocations(blockManager.getStorages(lastBlock));
3812          }
3813    
3814          if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
3815            // There is no datanode reported to this block.
3816            // may be client have crashed before writing data to pipeline.
3817            // This blocks doesn't need any recovery.
3818            // We can remove this block and close the file.
3819            pendingFile.removeLastBlock(lastBlock);
3820            finalizeINodeFileUnderConstruction(src, pendingFile,
3821                iip.getLatestSnapshotId());
3822            NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
3823                + "Removed empty last block and closed file.");
3824            return true;
3825          }
3826          // start recovery of the last block for this file
3827          long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
3828          lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
3829          uc.initializeBlockRecovery(blockRecoveryId);
3830          leaseManager.renewLease(lease);
3831          // Cannot close file right now, since the last block requires recovery.
3832          // This may potentially cause infinite loop in lease recovery
3833          // if there are no valid replicas on data-nodes.
3834          NameNode.stateChangeLog.warn(
3835                    "DIR* NameSystem.internalReleaseLease: " +
3836                    "File " + src + " has not been closed." +
3837                   " Lease recovery is in progress. " +
3838                    "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
3839          break;
3840        }
3841        return false;
3842      }
3843    
3844      private Lease reassignLease(Lease lease, String src, String newHolder,
3845          INodeFile pendingFile) {
3846        assert hasWriteLock();
3847        if(newHolder == null)
3848          return lease;
3849        // The following transaction is not synced. Make sure it's sync'ed later.
3850        logReassignLease(lease.getHolder(), src, newHolder);
3851        return reassignLeaseInternal(lease, src, newHolder, pendingFile);
3852      }
3853      
3854      Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
3855          INodeFile pendingFile) {
3856        assert hasWriteLock();
3857        pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
3858        return leaseManager.reassignLease(lease, src, newHolder);
3859      }
3860    
3861      private void commitOrCompleteLastBlock(final INodeFile fileINode,
3862          final Block commitBlock) throws IOException {
3863        assert hasWriteLock();
3864        Preconditions.checkArgument(fileINode.isUnderConstruction());
3865        if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
3866          return;
3867        }
3868    
3869        // Adjust disk space consumption if required
3870        final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();    
3871        if (diff > 0) {
3872          try {
3873            String path = fileINode.getFullPathName();
3874            dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
3875          } catch (IOException e) {
3876            LOG.warn("Unexpected exception while updating disk space.", e);
3877          }
3878        }
3879      }
3880    
3881      private void finalizeINodeFileUnderConstruction(String src,
3882          INodeFile pendingFile, int latestSnapshot) throws IOException,
3883          UnresolvedLinkException {
3884        assert hasWriteLock();
3885        FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
3886        Preconditions.checkArgument(uc != null);
3887        leaseManager.removeLease(uc.getClientName(), src);
3888        
3889        pendingFile = pendingFile.recordModification(latestSnapshot);
3890    
3891        // The file is no longer pending.
3892        // Create permanent INode, update blocks. No need to replace the inode here
3893        // since we just remove the uc feature from pendingFile
3894        final INodeFile newFile = pendingFile.toCompleteFile(now());
3895    
3896        // close file and persist block allocations for this file
3897        dir.closeFile(src, newFile);
3898    
3899        blockManager.checkReplication(newFile);
3900      }
3901    
3902      @VisibleForTesting
3903      BlockInfo getStoredBlock(Block block) {
3904        return blockManager.getStoredBlock(block);
3905      }
3906      
3907      @Override
3908      public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
3909        assert hasReadLock();
3910        final BlockCollection bc = blockUC.getBlockCollection();
3911        if (bc == null || !(bc instanceof INodeFile)
3912            || !((INodeFile) bc).isUnderConstruction()) {
3913          return false;
3914        }
3915    
3916        INodeFile inodeUC = (INodeFile) bc;
3917        String fullName = inodeUC.getName();
3918        try {
3919          if (fullName != null && fullName.startsWith(Path.SEPARATOR)
3920              && dir.getINode(fullName) == inodeUC) {
3921            // If file exists in normal path then no need to look in snapshot
3922            return false;
3923          }
3924        } catch (UnresolvedLinkException e) {
3925          LOG.error("Error while resolving the link : " + fullName, e);
3926          return false;
3927        }
3928        /*
3929         * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
3930         * bc is not in the current fsdirectory tree, bc must represent a snapshot
3931         * file. 
3932         * 2. if fullName is not an absolute path, bc cannot be existent in the 
3933         * current fsdirectory tree. 
3934         * 3. if bc is not the current node associated with fullName, bc must be a
3935         * snapshot inode.
3936         */
3937        return true;
3938      }
3939    
3940      void commitBlockSynchronization(ExtendedBlock lastblock,
3941          long newgenerationstamp, long newlength,
3942          boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
3943          String[] newtargetstorages)
3944          throws IOException, UnresolvedLinkException {
3945        LOG.info("commitBlockSynchronization(lastblock=" + lastblock
3946                 + ", newgenerationstamp=" + newgenerationstamp
3947                 + ", newlength=" + newlength
3948                 + ", newtargets=" + Arrays.asList(newtargets)
3949                 + ", closeFile=" + closeFile
3950                 + ", deleteBlock=" + deleteblock
3951                 + ")");
3952        checkOperation(OperationCategory.WRITE);
3953        String src = "";
3954        writeLock();
3955        try {
3956          checkOperation(OperationCategory.WRITE);
3957          // If a DN tries to commit to the standby, the recovery will
3958          // fail, and the next retry will succeed on the new NN.
3959      
3960          checkNameNodeSafeMode(
3961              "Cannot commitBlockSynchronization while in safe mode");
3962          final BlockInfo storedBlock = getStoredBlock(
3963              ExtendedBlock.getLocalBlock(lastblock));
3964          if (storedBlock == null) {
3965            if (deleteblock) {
3966              // This may be a retry attempt so ignore the failure
3967              // to locate the block.
3968              if (LOG.isDebugEnabled()) {
3969                LOG.debug("Block (=" + lastblock + ") not found");
3970              }
3971              return;
3972            } else {
3973              throw new IOException("Block (=" + lastblock + ") not found");
3974            }
3975          }
3976          INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile();
3977          if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
3978            if (LOG.isDebugEnabled()) {
3979              LOG.debug("Unexpected block (=" + lastblock
3980                        + ") since the file (=" + iFile.getLocalName()
3981                        + ") is not under construction");
3982            }
3983            return;
3984          }
3985    
3986          long recoveryId =
3987            ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
3988          if(recoveryId != newgenerationstamp) {
3989            throw new IOException("The recovery id " + newgenerationstamp
3990                                  + " does not match current recovery id "
3991                                  + recoveryId + " for block " + lastblock); 
3992          }
3993    
3994          if (deleteblock) {
3995            Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
3996            boolean remove = iFile.removeLastBlock(blockToDel);
3997            if (remove) {
3998              blockManager.removeBlockFromMap(storedBlock);
3999            }
4000          }
4001          else {
4002            // update last block
4003            storedBlock.setGenerationStamp(newgenerationstamp);
4004            storedBlock.setNumBytes(newlength);
4005    
4006            // find the DatanodeDescriptor objects
4007            // There should be no locations in the blockManager till now because the
4008            // file is underConstruction
4009            ArrayList<DatanodeDescriptor> trimmedTargets =
4010                new ArrayList<DatanodeDescriptor>(newtargets.length);
4011            ArrayList<String> trimmedStorages =
4012                new ArrayList<String>(newtargets.length);
4013            if (newtargets.length > 0) {
4014              for (int i = 0; i < newtargets.length; ++i) {
4015                // try to get targetNode
4016                DatanodeDescriptor targetNode =
4017                    blockManager.getDatanodeManager().getDatanode(newtargets[i]);
4018                if (targetNode != null) {
4019                  trimmedTargets.add(targetNode);
4020                  trimmedStorages.add(newtargetstorages[i]);
4021                } else if (LOG.isDebugEnabled()) {
4022                  LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
4023                }
4024              }
4025            }
4026            if ((closeFile) && !trimmedTargets.isEmpty()) {
4027              // the file is getting closed. Insert block locations into blockManager.
4028              // Otherwise fsck will report these blocks as MISSING, especially if the
4029              // blocksReceived from Datanodes take a long time to arrive.
4030              for (int i = 0; i < trimmedTargets.size(); i++) {
4031                trimmedTargets.get(i).addBlock(
4032                  trimmedStorages.get(i), storedBlock);
4033              }
4034            }
4035    
4036            // add pipeline locations into the INodeUnderConstruction
4037            DatanodeStorageInfo[] trimmedStorageInfos =
4038                blockManager.getDatanodeManager().getDatanodeStorageInfos(
4039                    trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
4040                    trimmedStorages.toArray(new String[trimmedStorages.size()]));
4041            iFile.setLastBlock(storedBlock, trimmedStorageInfos);
4042          }
4043    
4044          if (closeFile) {
4045            src = closeFileCommitBlocks(iFile, storedBlock);
4046          } else {
4047            // If this commit does not want to close the file, persist blocks
4048            src = persistBlocks(iFile, false);
4049          }
4050        } finally {
4051          writeUnlock();
4052        }
4053        getEditLog().logSync();
4054        if (closeFile) {
4055          LOG.info("commitBlockSynchronization(newblock=" + lastblock
4056              + ", file=" + src
4057              + ", newgenerationstamp=" + newgenerationstamp
4058              + ", newlength=" + newlength
4059              + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
4060        } else {
4061          LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
4062        }
4063      }
4064    
4065      /**
4066       *
4067       * @param pendingFile
4068       * @param storedBlock
4069       * @return Path of the file that was closed.
4070       * @throws IOException
4071       */
4072      @VisibleForTesting
4073      String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
4074          throws IOException {
4075        String src = pendingFile.getFullPathName();
4076    
4077        // commit the last block and complete it if it has minimum replicas
4078        commitOrCompleteLastBlock(pendingFile, storedBlock);
4079    
4080        //remove lease, close file
4081        finalizeINodeFileUnderConstruction(src, pendingFile,
4082            Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
4083    
4084        return src;
4085      }
4086    
4087      /**
4088       * Persist the block list for the given file.
4089       *
4090       * @param pendingFile
4091       * @return Path to the given file.
4092       * @throws IOException
4093       */
4094      @VisibleForTesting
4095      String persistBlocks(INodeFile pendingFile, boolean logRetryCache)
4096          throws IOException {
4097        String src = pendingFile.getFullPathName();
4098        dir.persistBlocks(src, pendingFile, logRetryCache);
4099        return src;
4100      }
4101    
4102      /**
4103       * Renew the lease(s) held by the given client
4104       */
4105      void renewLease(String holder) throws IOException {
4106        checkOperation(OperationCategory.WRITE);
4107        readLock();
4108        try {
4109          checkOperation(OperationCategory.WRITE);
4110          checkNameNodeSafeMode("Cannot renew lease for " + holder);
4111          leaseManager.renewLease(holder);
4112        } finally {
4113          readUnlock();
4114        }
4115      }
4116    
4117      /**
4118       * Get a partial listing of the indicated directory
4119       *
4120       * @param src the directory name
4121       * @param startAfter the name to start after
4122       * @param needLocation if blockLocations need to be returned
4123       * @return a partial listing starting after startAfter
4124       * 
4125       * @throws AccessControlException if access is denied
4126       * @throws UnresolvedLinkException if symbolic link is encountered
4127       * @throws IOException if other I/O error occurred
4128       */
4129      DirectoryListing getListing(String src, byte[] startAfter,
4130          boolean needLocation) 
4131          throws AccessControlException, UnresolvedLinkException, IOException {
4132        try {
4133          return getListingInt(src, startAfter, needLocation);
4134        } catch (AccessControlException e) {
4135          logAuditEvent(false, "listStatus", src);
4136          throw e;
4137        }
4138      }
4139    
4140      private DirectoryListing getListingInt(String src, byte[] startAfter,
4141          boolean needLocation) 
4142        throws AccessControlException, UnresolvedLinkException, IOException {
4143        DirectoryListing dl;
4144        FSPermissionChecker pc = getPermissionChecker();
4145        checkOperation(OperationCategory.READ);
4146        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4147        String startAfterString = new String(startAfter);
4148        readLock();
4149        try {
4150          checkOperation(OperationCategory.READ);
4151          src = FSDirectory.resolvePath(src, pathComponents, dir);
4152    
4153          // Get file name when startAfter is an INodePath
4154          if (FSDirectory.isReservedName(startAfterString)) {
4155            byte[][] startAfterComponents = FSDirectory
4156                .getPathComponentsForReservedPath(startAfterString);
4157            try {
4158              String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4159              byte[][] regularPath = INode.getPathComponents(tmp);
4160              startAfter = regularPath[regularPath.length - 1];
4161            } catch (IOException e) {
4162              // Possibly the inode is deleted
4163              throw new DirectoryListingStartAfterNotFoundException(
4164                  "Can't find startAfter " + startAfterString);
4165            }
4166          }
4167          
4168          if (isPermissionEnabled) {
4169            if (dir.isDir(src)) {
4170              checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4171            } else {
4172              checkTraverse(pc, src);
4173            }
4174          }
4175          logAuditEvent(true, "listStatus", src);
4176          dl = dir.getListing(src, startAfter, needLocation);
4177        } finally {
4178          readUnlock();
4179        }
4180        return dl;
4181      }
4182    
4183      /////////////////////////////////////////////////////////
4184      //
4185      // These methods are called by datanodes
4186      //
4187      /////////////////////////////////////////////////////////
4188      /**
4189       * Register Datanode.
4190       * <p>
4191       * The purpose of registration is to identify whether the new datanode
4192       * serves a new data storage, and will report new data block copies,
4193       * which the namenode was not aware of; or the datanode is a replacement
4194       * node for the data storage that was previously served by a different
4195       * or the same (in terms of host:port) datanode.
4196       * The data storages are distinguished by their storageIDs. When a new
4197       * data storage is reported the namenode issues a new unique storageID.
4198       * <p>
4199       * Finally, the namenode returns its namespaceID as the registrationID
4200       * for the datanodes. 
4201       * namespaceID is a persistent attribute of the name space.
4202       * The registrationID is checked every time the datanode is communicating
4203       * with the namenode. 
4204       * Datanodes with inappropriate registrationID are rejected.
4205       * If the namenode stops, and then restarts it can restore its 
4206       * namespaceID and will continue serving the datanodes that has previously
4207       * registered with the namenode without restarting the whole cluster.
4208       * 
4209       * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4210       */
4211      void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4212        writeLock();
4213        try {
4214          getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
4215          checkSafeMode();
4216        } finally {
4217          writeUnlock();
4218        }
4219      }
4220      
4221      /**
4222       * Get registrationID for datanodes based on the namespaceID.
4223       * 
4224       * @see #registerDatanode(DatanodeRegistration)
4225       * @return registration ID
4226       */
4227      String getRegistrationID() {
4228        return Storage.getRegistrationID(dir.fsImage.getStorage());
4229      }
4230    
4231      /**
4232       * The given node has reported in.  This method should:
4233       * 1) Record the heartbeat, so the datanode isn't timed out
4234       * 2) Adjust usage stats for future block allocation
4235       * 
4236       * If a substantial amount of time passed since the last datanode 
4237       * heartbeat then request an immediate block report.  
4238       * 
4239       * @return an array of datanode commands 
4240       * @throws IOException
4241       */
4242      HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
4243          StorageReport[] reports, long cacheCapacity, long cacheUsed,
4244          int xceiverCount, int xmitsInProgress, int failedVolumes)
4245            throws IOException {
4246        readLock();
4247        try {
4248          //get datanode commands
4249          final int maxTransfer = blockManager.getMaxReplicationStreams()
4250              - xmitsInProgress;
4251          DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
4252              nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
4253              xceiverCount, maxTransfer, failedVolumes);
4254          
4255          //create ha status
4256          final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
4257              haContext.getState().getServiceState(),
4258              getFSImage().getLastAppliedOrWrittenTxId());
4259    
4260          return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
4261        } finally {
4262          readUnlock();
4263        }
4264      }
4265    
4266      /**
4267       * Returns whether or not there were available resources at the last check of
4268       * resources.
4269       *
4270       * @return true if there were sufficient resources available, false otherwise.
4271       */
4272      boolean nameNodeHasResourcesAvailable() {
4273        return hasResourcesAvailable;
4274      }
4275    
4276      /**
4277       * Perform resource checks and cache the results.
4278       * @throws IOException
4279       */
4280      void checkAvailableResources() {
4281        Preconditions.checkState(nnResourceChecker != null,
4282            "nnResourceChecker not initialized");
4283        hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
4284      }
4285    
4286      /**
4287       * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
4288       * there are found to be insufficient resources available, causes the NN to
4289       * enter safe mode. If resources are later found to have returned to
4290       * acceptable levels, this daemon will cause the NN to exit safe mode.
4291       */
4292      class NameNodeResourceMonitor implements Runnable  {
4293        boolean shouldNNRmRun = true;
4294        @Override
4295        public void run () {
4296          try {
4297            while (fsRunning && shouldNNRmRun) {
4298              checkAvailableResources();
4299              if(!nameNodeHasResourcesAvailable()) {
4300                String lowResourcesMsg = "NameNode low on available disk space. ";
4301                if (!isInSafeMode()) {
4302                  FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
4303                } else {
4304                  FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
4305                }
4306                enterSafeMode(true);
4307              }
4308              try {
4309                Thread.sleep(resourceRecheckInterval);
4310              } catch (InterruptedException ie) {
4311                // Deliberately ignore
4312              }
4313            }
4314          } catch (Exception e) {
4315            FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
4316          }
4317        }
4318    
4319        public void stopMonitor() {
4320          shouldNNRmRun = false;
4321        }
4322     }
4323    
4324      class NameNodeEditLogRoller implements Runnable {
4325    
4326        private boolean shouldRun = true;
4327        private final long rollThreshold;
4328        private final long sleepIntervalMs;
4329    
4330        public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
4331            this.rollThreshold = rollThreshold;
4332            this.sleepIntervalMs = sleepIntervalMs;
4333        }
4334    
4335        @Override
4336        public void run() {
4337          while (fsRunning && shouldRun) {
4338            try {
4339              FSEditLog editLog = getFSImage().getEditLog();
4340              long numEdits =
4341                  editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
4342              if (numEdits > rollThreshold) {
4343                FSNamesystem.LOG.info("NameNode rolling its own edit log because"
4344                    + " number of edits in open segment exceeds threshold of "
4345                    + rollThreshold);
4346                rollEditLog();
4347              }
4348              Thread.sleep(sleepIntervalMs);
4349            } catch (InterruptedException e) {
4350              FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
4351                  + " was interrupted, exiting");
4352              break;
4353            } catch (Exception e) {
4354              FSNamesystem.LOG.error("Swallowing exception in "
4355                  + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
4356            }
4357          }
4358        }
4359    
4360        public void stop() {
4361          shouldRun = false;
4362        }
4363      }
4364    
4365      public FSImage getFSImage() {
4366        return dir.fsImage;
4367      }
4368    
4369      public FSEditLog getEditLog() {
4370        return getFSImage().getEditLog();
4371      }    
4372    
4373      private void checkBlock(ExtendedBlock block) throws IOException {
4374        if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
4375          throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
4376              + " - expected " + blockPoolId);
4377        }
4378      }
4379    
4380      @Metric({"MissingBlocks", "Number of missing blocks"})
4381      public long getMissingBlocksCount() {
4382        // not locking
4383        return blockManager.getMissingBlocksCount();
4384      }
4385      
4386      @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
4387      public int getExpiredHeartbeats() {
4388        return datanodeStatistics.getExpiredHeartbeats();
4389      }
4390      
4391      @Metric({"TransactionsSinceLastCheckpoint",
4392          "Number of transactions since last checkpoint"})
4393      public long getTransactionsSinceLastCheckpoint() {
4394        return getEditLog().getLastWrittenTxId() -
4395            getFSImage().getStorage().getMostRecentCheckpointTxId();
4396      }
4397      
4398      @Metric({"TransactionsSinceLastLogRoll",
4399          "Number of transactions since last edit log roll"})
4400      public long getTransactionsSinceLastLogRoll() {
4401        if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
4402          return 0;
4403        } else {
4404          return getEditLog().getLastWrittenTxId() -
4405            getEditLog().getCurSegmentTxId() + 1;
4406        }
4407      }
4408      
4409      @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
4410      public long getLastWrittenTransactionId() {
4411        return getEditLog().getLastWrittenTxId();
4412      }
4413      
4414      @Metric({"LastCheckpointTime",
4415          "Time in milliseconds since the epoch of the last checkpoint"})
4416      public long getLastCheckpointTime() {
4417        return getFSImage().getStorage().getMostRecentCheckpointTime();
4418      }
4419    
4420      /** @see ClientProtocol#getStats() */
4421      long[] getStats() {
4422        final long[] stats = datanodeStatistics.getStats();
4423        stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
4424        stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
4425        stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
4426        return stats;
4427      }
4428    
4429      @Override // FSNamesystemMBean
4430      @Metric({"CapacityTotal",
4431          "Total raw capacity of data nodes in bytes"})
4432      public long getCapacityTotal() {
4433        return datanodeStatistics.getCapacityTotal();
4434      }
4435    
4436      @Metric({"CapacityTotalGB",
4437          "Total raw capacity of data nodes in GB"})
4438      public float getCapacityTotalGB() {
4439        return DFSUtil.roundBytesToGB(getCapacityTotal());
4440      }
4441    
4442      @Override // FSNamesystemMBean
4443      @Metric({"CapacityUsed",
4444          "Total used capacity across all data nodes in bytes"})
4445      public long getCapacityUsed() {
4446        return datanodeStatistics.getCapacityUsed();
4447      }
4448    
4449      @Metric({"CapacityUsedGB",
4450          "Total used capacity across all data nodes in GB"})
4451      public float getCapacityUsedGB() {
4452        return DFSUtil.roundBytesToGB(getCapacityUsed());
4453      }
4454    
4455      @Override // FSNamesystemMBean
4456      @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
4457      public long getCapacityRemaining() {
4458        return datanodeStatistics.getCapacityRemaining();
4459      }
4460    
4461      @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
4462      public float getCapacityRemainingGB() {
4463        return DFSUtil.roundBytesToGB(getCapacityRemaining());
4464      }
4465    
4466      @Metric({"CapacityUsedNonDFS",
4467          "Total space used by data nodes for non DFS purposes in bytes"})
4468      public long getCapacityUsedNonDFS() {
4469        return datanodeStatistics.getCapacityUsedNonDFS();
4470      }
4471    
4472      /**
4473       * Total number of connections.
4474       */
4475      @Override // FSNamesystemMBean
4476      @Metric
4477      public int getTotalLoad() {
4478        return datanodeStatistics.getXceiverCount();
4479      }
4480      
4481      @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
4482      public int getNumSnapshottableDirs() {
4483        return this.snapshotManager.getNumSnapshottableDirs();
4484      }
4485    
4486      @Metric({ "Snapshots", "The number of snapshots" })
4487      public int getNumSnapshots() {
4488        return this.snapshotManager.getNumSnapshots();
4489      }
4490    
4491      @Override
4492      public String getSnapshotStats() {
4493        Map<String, Object> info = new HashMap<String, Object>();
4494        info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
4495        info.put("Snapshots", this.getNumSnapshots());
4496        return JSON.toString(info);
4497      }
4498    
4499      int getNumberOfDatanodes(DatanodeReportType type) {
4500        readLock();
4501        try {
4502          return getBlockManager().getDatanodeManager().getDatanodeListForReport(
4503              type).size(); 
4504        } finally {
4505          readUnlock();
4506        }
4507      }
4508    
4509      DatanodeInfo[] datanodeReport(final DatanodeReportType type
4510          ) throws AccessControlException, StandbyException {
4511        checkSuperuserPrivilege();
4512        checkOperation(OperationCategory.UNCHECKED);
4513        readLock();
4514        try {
4515          checkOperation(OperationCategory.UNCHECKED);
4516          final DatanodeManager dm = getBlockManager().getDatanodeManager();      
4517          final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
4518    
4519          DatanodeInfo[] arr = new DatanodeInfo[results.size()];
4520          for (int i=0; i<arr.length; i++) {
4521            arr[i] = new DatanodeInfo(results.get(i));
4522          }
4523          return arr;
4524        } finally {
4525          readUnlock();
4526        }
4527      }
4528    
4529      /**
4530       * Save namespace image.
4531       * This will save current namespace into fsimage file and empty edits file.
4532       * Requires superuser privilege and safe mode.
4533       * 
4534       * @throws AccessControlException if superuser privilege is violated.
4535       * @throws IOException if 
4536       */
4537      void saveNamespace() throws AccessControlException, IOException {
4538        checkOperation(OperationCategory.UNCHECKED);
4539        checkSuperuserPrivilege();
4540        
4541        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
4542        if (cacheEntry != null && cacheEntry.isSuccess()) {
4543          return; // Return previous response
4544        }
4545        boolean success = false;
4546        readLock();
4547        try {
4548          checkOperation(OperationCategory.UNCHECKED);
4549    
4550          if (!isInSafeMode()) {
4551            throw new IOException("Safe mode should be turned ON "
4552                + "in order to create namespace image.");
4553          }
4554          getFSImage().saveNamespace(this);
4555          success = true;
4556        } finally {
4557          readUnlock();
4558          RetryCache.setState(cacheEntry, success);
4559        }
4560        LOG.info("New namespace image has been created");
4561      }
4562      
4563      /**
4564       * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
4565       * Requires superuser privilege.
4566       * 
4567       * @throws AccessControlException if superuser privilege is violated.
4568       */
4569      boolean restoreFailedStorage(String arg) throws AccessControlException,
4570          StandbyException {
4571        checkSuperuserPrivilege();
4572        checkOperation(OperationCategory.UNCHECKED);
4573        writeLock();
4574        try {
4575          checkOperation(OperationCategory.UNCHECKED);
4576          
4577          // if it is disabled - enable it and vice versa.
4578          if(arg.equals("check"))
4579            return getFSImage().getStorage().getRestoreFailedStorage();
4580          
4581          boolean val = arg.equals("true");  // false if not
4582          getFSImage().getStorage().setRestoreFailedStorage(val);
4583          
4584          return val;
4585        } finally {
4586          writeUnlock();
4587        }
4588      }
4589    
4590      Date getStartTime() {
4591        return new Date(startTime); 
4592      }
4593        
4594      void finalizeUpgrade() throws IOException {
4595        checkSuperuserPrivilege();
4596        checkOperation(OperationCategory.UNCHECKED);
4597        writeLock();
4598        try {
4599          checkOperation(OperationCategory.UNCHECKED);
4600          getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
4601        } finally {
4602          writeUnlock();
4603        }
4604      }
4605    
4606      void refreshNodes() throws IOException {
4607        checkOperation(OperationCategory.UNCHECKED);
4608        checkSuperuserPrivilege();
4609        getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
4610      }
4611    
4612      void setBalancerBandwidth(long bandwidth) throws IOException {
4613        checkOperation(OperationCategory.UNCHECKED);
4614        checkSuperuserPrivilege();
4615        getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
4616      }
4617    
4618      /**
4619       * SafeModeInfo contains information related to the safe mode.
4620       * <p>
4621       * An instance of {@link SafeModeInfo} is created when the name node
4622       * enters safe mode.
4623       * <p>
4624       * During name node startup {@link SafeModeInfo} counts the number of
4625       * <em>safe blocks</em>, those that have at least the minimal number of
4626       * replicas, and calculates the ratio of safe blocks to the total number
4627       * of blocks in the system, which is the size of blocks in
4628       * {@link FSNamesystem#blockManager}. When the ratio reaches the
4629       * {@link #threshold} it starts the SafeModeMonitor daemon in order
4630       * to monitor whether the safe mode {@link #extension} is passed.
4631       * Then it leaves safe mode and destroys itself.
4632       * <p>
4633       * If safe mode is turned on manually then the number of safe blocks is
4634       * not tracked because the name node is not intended to leave safe mode
4635       * automatically in the case.
4636       *
4637       * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
4638       */
4639      public class SafeModeInfo {
4640        // configuration fields
4641        /** Safe mode threshold condition %.*/
4642        private final double threshold;
4643        /** Safe mode minimum number of datanodes alive */
4644        private final int datanodeThreshold;
4645        /** Safe mode extension after the threshold. */
4646        private int extension;
4647        /** Min replication required by safe mode. */
4648        private final int safeReplication;
4649        /** threshold for populating needed replication queues */
4650        private final double replQueueThreshold;
4651        // internal fields
4652        /** Time when threshold was reached.
4653         * <br> -1 safe mode is off
4654         * <br> 0 safe mode is on, and threshold is not reached yet
4655         * <br> >0 safe mode is on, but we are in extension period 
4656         */
4657        private long reached = -1;  
4658        /** Total number of blocks. */
4659        int blockTotal; 
4660        /** Number of safe blocks. */
4661        int blockSafe;
4662        /** Number of blocks needed to satisfy safe mode threshold condition */
4663        private int blockThreshold;
4664        /** Number of blocks needed before populating replication queues */
4665        private int blockReplQueueThreshold;
4666        /** time of the last status printout */
4667        private long lastStatusReport = 0;
4668        /** Was safemode entered automatically because available resources were low. */
4669        private boolean resourcesLow = false;
4670        /** Should safemode adjust its block totals as blocks come in */
4671        private boolean shouldIncrementallyTrackBlocks = false;
4672        /** counter for tracking startup progress of reported blocks */
4673        private Counter awaitingReportedBlocksCounter;
4674        
4675        /**
4676         * Creates SafeModeInfo when the name node enters
4677         * automatic safe mode at startup.
4678         *  
4679         * @param conf configuration
4680         */
4681        private SafeModeInfo(Configuration conf) {
4682          this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
4683              DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
4684          if(threshold > 1.0) {
4685            LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
4686          }
4687          this.datanodeThreshold = conf.getInt(
4688            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
4689            DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
4690          this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
4691          this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 
4692                                             DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
4693          
4694          LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
4695          LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
4696          LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + "     = " + extension);
4697    
4698          // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
4699          this.replQueueThreshold = 
4700            conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
4701                          (float) threshold);
4702          this.blockTotal = 0; 
4703          this.blockSafe = 0;
4704        }
4705    
4706        /**
4707         * In the HA case, the StandbyNode can be in safemode while the namespace
4708         * is modified by the edit log tailer. In this case, the number of total
4709         * blocks changes as edits are processed (eg blocks are added and deleted).
4710         * However, we don't want to do the incremental tracking during the
4711         * startup-time loading process -- only once the initial total has been
4712         * set after the image has been loaded.
4713         */
4714        private boolean shouldIncrementallyTrackBlocks() {
4715          return shouldIncrementallyTrackBlocks;
4716        }
4717    
4718        /**
4719         * Creates SafeModeInfo when safe mode is entered manually, or because
4720         * available resources are low.
4721         *
4722         * The {@link #threshold} is set to 1.5 so that it could never be reached.
4723         * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
4724         * 
4725         * @see SafeModeInfo
4726         */
4727        private SafeModeInfo(boolean resourcesLow) {
4728          this.threshold = 1.5f;  // this threshold can never be reached
4729          this.datanodeThreshold = Integer.MAX_VALUE;
4730          this.extension = Integer.MAX_VALUE;
4731          this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
4732          this.replQueueThreshold = 1.5f; // can never be reached
4733          this.blockTotal = -1;
4734          this.blockSafe = -1;
4735          this.resourcesLow = resourcesLow;
4736          enter();
4737          reportStatus("STATE* Safe mode is ON.", true);
4738        }
4739          
4740        /**
4741         * Check if safe mode is on.
4742         * @return true if in safe mode
4743         */
4744        private synchronized boolean isOn() {
4745          doConsistencyCheck();
4746          return this.reached >= 0;
4747        }
4748          
4749        /**
4750         * Enter safe mode.
4751         */
4752        private void enter() {
4753          this.reached = 0;
4754        }
4755          
4756        /**
4757         * Leave safe mode.
4758         * <p>
4759         * Check for invalid, under- & over-replicated blocks in the end of startup.
4760         */
4761        private synchronized void leave() {
4762          // if not done yet, initialize replication queues.
4763          // In the standby, do not populate repl queues
4764          if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
4765            initializeReplQueues();
4766          }
4767          long timeInSafemode = now() - startTime;
4768          NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 
4769                                        + timeInSafemode/1000 + " secs");
4770          NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
4771    
4772          //Log the following only once (when transitioning from ON -> OFF)
4773          if (reached >= 0) {
4774            NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 
4775          }
4776          reached = -1;
4777          safeMode = null;
4778          final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
4779          NameNode.stateChangeLog.info("STATE* Network topology has "
4780              + nt.getNumOfRacks() + " racks and "
4781              + nt.getNumOfLeaves() + " datanodes");
4782          NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
4783              + blockManager.numOfUnderReplicatedBlocks() + " blocks");
4784    
4785          startSecretManagerIfNecessary();
4786    
4787          // If startup has not yet completed, end safemode phase.
4788          StartupProgress prog = NameNode.getStartupProgress();
4789          if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4790            prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
4791            prog.endPhase(Phase.SAFEMODE);
4792          }
4793        }
4794    
4795        /**
4796         * Check whether we have reached the threshold for 
4797         * initializing replication queues.
4798         */
4799        private synchronized boolean canInitializeReplQueues() {
4800          return shouldPopulateReplQueues()
4801              && blockSafe >= blockReplQueueThreshold;
4802        }
4803          
4804        /** 
4805         * Safe mode can be turned off iff 
4806         * the threshold is reached and 
4807         * the extension time have passed.
4808         * @return true if can leave or false otherwise.
4809         */
4810        private synchronized boolean canLeave() {
4811          if (reached == 0) {
4812            return false;
4813          }
4814    
4815          if (now() - reached < extension) {
4816            reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
4817            return false;
4818          }
4819    
4820          if (needEnter()) {
4821            reportStatus("STATE* Safe mode ON, thresholds not met.", false);
4822            return false;
4823          }
4824    
4825          return true;
4826        }
4827          
4828        /** 
4829         * There is no need to enter safe mode 
4830         * if DFS is empty or {@link #threshold} == 0
4831         */
4832        private boolean needEnter() {
4833          return (threshold != 0 && blockSafe < blockThreshold) ||
4834            (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
4835            (!nameNodeHasResourcesAvailable());
4836        }
4837          
4838        /**
4839         * Check and trigger safe mode if needed. 
4840         */
4841        private void checkMode() {
4842          // Have to have write-lock since leaving safemode initializes
4843          // repl queues, which requires write lock
4844          assert hasWriteLock();
4845          // if smmthread is already running, the block threshold must have been 
4846          // reached before, there is no need to enter the safe mode again
4847          if (smmthread == null && needEnter()) {
4848            enter();
4849            // check if we are ready to initialize replication queues
4850            if (canInitializeReplQueues() && !isPopulatingReplQueues()
4851                && !haEnabled) {
4852              initializeReplQueues();
4853            }
4854            reportStatus("STATE* Safe mode ON.", false);
4855            return;
4856          }
4857          // the threshold is reached or was reached before
4858          if (!isOn() ||                           // safe mode is off
4859              extension <= 0 || threshold <= 0) {  // don't need to wait
4860            this.leave(); // leave safe mode
4861            return;
4862          }
4863          if (reached > 0) {  // threshold has already been reached before
4864            reportStatus("STATE* Safe mode ON.", false);
4865            return;
4866          }
4867          // start monitor
4868          reached = now();
4869          if (smmthread == null) {
4870            smmthread = new Daemon(new SafeModeMonitor());
4871            smmthread.start();
4872            reportStatus("STATE* Safe mode extension entered.", true);
4873          }
4874    
4875          // check if we are ready to initialize replication queues
4876          if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) {
4877            initializeReplQueues();
4878          }
4879        }
4880          
4881        /**
4882         * Set total number of blocks.
4883         */
4884        private synchronized void setBlockTotal(int total) {
4885          this.blockTotal = total;
4886          this.blockThreshold = (int) (blockTotal * threshold);
4887          this.blockReplQueueThreshold = 
4888            (int) (blockTotal * replQueueThreshold);
4889          if (haEnabled) {
4890            // After we initialize the block count, any further namespace
4891            // modifications done while in safe mode need to keep track
4892            // of the number of total blocks in the system.
4893            this.shouldIncrementallyTrackBlocks = true;
4894          }
4895          if(blockSafe < 0)
4896            this.blockSafe = 0;
4897          checkMode();
4898        }
4899          
4900        /**
4901         * Increment number of safe blocks if current block has 
4902         * reached minimal replication.
4903         * @param replication current replication 
4904         */
4905        private synchronized void incrementSafeBlockCount(short replication) {
4906          if (replication == safeReplication) {
4907            this.blockSafe++;
4908    
4909            // Report startup progress only if we haven't completed startup yet.
4910            StartupProgress prog = NameNode.getStartupProgress();
4911            if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4912              if (this.awaitingReportedBlocksCounter == null) {
4913                this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
4914                  STEP_AWAITING_REPORTED_BLOCKS);
4915              }
4916              this.awaitingReportedBlocksCounter.increment();
4917            }
4918    
4919            checkMode();
4920          }
4921        }
4922          
4923        /**
4924         * Decrement number of safe blocks if current block has 
4925         * fallen below minimal replication.
4926         * @param replication current replication 
4927         */
4928        private synchronized void decrementSafeBlockCount(short replication) {
4929          if (replication == safeReplication-1) {
4930            this.blockSafe--;
4931            //blockSafe is set to -1 in manual / low resources safemode
4932            assert blockSafe >= 0 || isManual() || areResourcesLow();
4933            checkMode();
4934          }
4935        }
4936    
4937        /**
4938         * Check if safe mode was entered manually
4939         */
4940        private boolean isManual() {
4941          return extension == Integer.MAX_VALUE;
4942        }
4943    
4944        /**
4945         * Set manual safe mode.
4946         */
4947        private synchronized void setManual() {
4948          extension = Integer.MAX_VALUE;
4949        }
4950    
4951        /**
4952         * Check if safe mode was entered due to resources being low.
4953         */
4954        private boolean areResourcesLow() {
4955          return resourcesLow;
4956        }
4957    
4958        /**
4959         * Set that resources are low for this instance of safe mode.
4960         */
4961        private void setResourcesLow() {
4962          resourcesLow = true;
4963        }
4964    
4965        /**
4966         * A tip on how safe mode is to be turned off: manually or automatically.
4967         */
4968        String getTurnOffTip() {
4969          if(!isOn()) {
4970            return "Safe mode is OFF.";
4971          }
4972    
4973          //Manual OR low-resource safemode. (Admin intervention required)
4974          String adminMsg = "It was turned on manually. ";
4975          if (areResourcesLow()) {
4976            adminMsg = "Resources are low on NN. Please add or free up more "
4977              + "resources then turn off safe mode manually. NOTE:  If you turn off"
4978              + " safe mode before adding resources, "
4979              + "the NN will immediately return to safe mode. ";
4980          }
4981          if (isManual() || areResourcesLow()) {
4982            return adminMsg
4983              + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
4984          }
4985    
4986          boolean thresholdsMet = true;
4987          int numLive = getNumLiveDataNodes();
4988          String msg = "";
4989          if (blockSafe < blockThreshold) {
4990            msg += String.format(
4991              "The reported blocks %d needs additional %d"
4992              + " blocks to reach the threshold %.4f of total blocks %d.\n",
4993              blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
4994            thresholdsMet = false;
4995          } else {
4996            msg += String.format("The reported blocks %d has reached the threshold"
4997                + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
4998          }
4999          if (numLive < datanodeThreshold) {
5000            msg += String.format(
5001              "The number of live datanodes %d needs an additional %d live "
5002              + "datanodes to reach the minimum number %d.\n",
5003              numLive, (datanodeThreshold - numLive), datanodeThreshold);
5004            thresholdsMet = false;
5005          } else {
5006            msg += String.format("The number of live datanodes %d has reached "
5007                + "the minimum number %d. ",
5008                numLive, datanodeThreshold);
5009          }
5010          msg += (reached > 0) ? "In safe mode extension. " : "";
5011          msg += "Safe mode will be turned off automatically ";
5012    
5013          if (!thresholdsMet) {
5014            msg += "once the thresholds have been reached.";
5015          } else if (reached + extension - now() > 0) {
5016            msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
5017          } else {
5018            msg += "soon.";
5019          }
5020    
5021          return msg;
5022        }
5023    
5024        /**
5025         * Print status every 20 seconds.
5026         */
5027        private void reportStatus(String msg, boolean rightNow) {
5028          long curTime = now();
5029          if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
5030            return;
5031          NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
5032          lastStatusReport = curTime;
5033        }
5034    
5035        @Override
5036        public String toString() {
5037          String resText = "Current safe blocks = " 
5038            + blockSafe 
5039            + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
5040            + ". Minimal replication = " + safeReplication + ".";
5041          if (reached > 0) 
5042            resText += " Threshold was reached " + new Date(reached) + ".";
5043          return resText;
5044        }
5045          
5046        /**
5047         * Checks consistency of the class state.
5048         * This is costly so only runs if asserts are enabled.
5049         */
5050        private void doConsistencyCheck() {
5051          boolean assertsOn = false;
5052          assert assertsOn = true; // set to true if asserts are on
5053          if (!assertsOn) return;
5054          
5055          if (blockTotal == -1 && blockSafe == -1) {
5056            return; // manual safe mode
5057          }
5058          int activeBlocks = blockManager.getActiveBlockCount();
5059          if ((blockTotal != activeBlocks) &&
5060              !(blockSafe >= 0 && blockSafe <= blockTotal)) {
5061            throw new AssertionError(
5062                " SafeMode: Inconsistent filesystem state: "
5063            + "SafeMode data: blockTotal=" + blockTotal
5064            + " blockSafe=" + blockSafe + "; "
5065            + "BlockManager data: active="  + activeBlocks);
5066          }
5067        }
5068    
5069        private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
5070          if (!shouldIncrementallyTrackBlocks) {
5071            return;
5072          }
5073          assert haEnabled;
5074          
5075          if (LOG.isDebugEnabled()) {
5076            LOG.debug("Adjusting block totals from " +
5077                blockSafe + "/" + blockTotal + " to " +
5078                (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
5079          }
5080          assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
5081            blockSafe + " by " + deltaSafe + ": would be negative";
5082          assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
5083            blockTotal + " by " + deltaTotal + ": would be negative";
5084          
5085          blockSafe += deltaSafe;
5086          setBlockTotal(blockTotal + deltaTotal);
5087        }
5088      }
5089        
5090      /**
5091       * Periodically check whether it is time to leave safe mode.
5092       * This thread starts when the threshold level is reached.
5093       *
5094       */
5095      class SafeModeMonitor implements Runnable {
5096        /** interval in msec for checking safe mode: {@value} */
5097        private static final long recheckInterval = 1000;
5098          
5099        /**
5100         */
5101        @Override
5102        public void run() {
5103          while (fsRunning) {
5104            writeLock();
5105            try {
5106              if (safeMode == null) { // Not in safe mode.
5107                break;
5108              }
5109              if (safeMode.canLeave()) {
5110                // Leave safe mode.
5111                safeMode.leave();
5112                smmthread = null;
5113                break;
5114              }
5115            } finally {
5116              writeUnlock();
5117            }
5118    
5119            try {
5120              Thread.sleep(recheckInterval);
5121            } catch (InterruptedException ie) {
5122              // Ignored
5123            }
5124          }
5125          if (!fsRunning) {
5126            LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
5127          }
5128        }
5129      }
5130        
5131      boolean setSafeMode(SafeModeAction action) throws IOException {
5132        if (action != SafeModeAction.SAFEMODE_GET) {
5133          checkSuperuserPrivilege();
5134          switch(action) {
5135          case SAFEMODE_LEAVE: // leave safe mode
5136            leaveSafeMode();
5137            break;
5138          case SAFEMODE_ENTER: // enter safe mode
5139            enterSafeMode(false);
5140            break;
5141          default:
5142            LOG.error("Unexpected safe mode action");
5143          }
5144        }
5145        return isInSafeMode();
5146      }
5147    
5148      @Override
5149      public void checkSafeMode() {
5150        // safeMode is volatile, and may be set to null at any time
5151        SafeModeInfo safeMode = this.safeMode;
5152        if (safeMode != null) {
5153          safeMode.checkMode();
5154        }
5155      }
5156    
5157      @Override
5158      public boolean isInSafeMode() {
5159        // safeMode is volatile, and may be set to null at any time
5160        SafeModeInfo safeMode = this.safeMode;
5161        if (safeMode == null)
5162          return false;
5163        return safeMode.isOn();
5164      }
5165    
5166      @Override
5167      public boolean isInStartupSafeMode() {
5168        // safeMode is volatile, and may be set to null at any time
5169        SafeModeInfo safeMode = this.safeMode;
5170        if (safeMode == null)
5171          return false;
5172        // If the NN is in safemode, and not due to manual / low resources, we
5173        // assume it must be because of startup. If the NN had low resources during
5174        // startup, we assume it came out of startup safemode and it is now in low
5175        // resources safemode
5176        return !safeMode.isManual() && !safeMode.areResourcesLow()
5177          && safeMode.isOn();
5178      }
5179    
5180      /**
5181       * Check if replication queues are to be populated
5182       * @return true when node is HAState.Active and not in the very first safemode
5183       */
5184      @Override
5185      public boolean isPopulatingReplQueues() {
5186        if (!shouldPopulateReplQueues()) {
5187          return false;
5188        }
5189        return initializedReplQueues;
5190      }
5191    
5192      private boolean shouldPopulateReplQueues() {
5193        if(haContext == null || haContext.getState() == null)
5194          return false;
5195        return haContext.getState().shouldPopulateReplQueues();
5196      }
5197    
5198      @Override
5199      public void incrementSafeBlockCount(int replication) {
5200        // safeMode is volatile, and may be set to null at any time
5201        SafeModeInfo safeMode = this.safeMode;
5202        if (safeMode == null)
5203          return;
5204        safeMode.incrementSafeBlockCount((short)replication);
5205      }
5206    
5207      @Override
5208      public void decrementSafeBlockCount(Block b) {
5209        // safeMode is volatile, and may be set to null at any time
5210        SafeModeInfo safeMode = this.safeMode;
5211        if (safeMode == null) // mostly true
5212          return;
5213        BlockInfo storedBlock = getStoredBlock(b);
5214        if (storedBlock.isComplete()) {
5215          safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
5216        }
5217      }
5218      
5219      /**
5220       * Adjust the total number of blocks safe and expected during safe mode.
5221       * If safe mode is not currently on, this is a no-op.
5222       * @param deltaSafe the change in number of safe blocks
5223       * @param deltaTotal the change i nnumber of total blocks expected
5224       */
5225      @Override
5226      public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
5227        // safeMode is volatile, and may be set to null at any time
5228        SafeModeInfo safeMode = this.safeMode;
5229        if (safeMode == null)
5230          return;
5231        safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
5232      }
5233    
5234      /**
5235       * Set the total number of blocks in the system. 
5236       */
5237      public void setBlockTotal() {
5238        // safeMode is volatile, and may be set to null at any time
5239        SafeModeInfo safeMode = this.safeMode;
5240        if (safeMode == null)
5241          return;
5242        safeMode.setBlockTotal((int)getCompleteBlocksTotal());
5243      }
5244    
5245      /**
5246       * Get the total number of blocks in the system. 
5247       */
5248      @Override // FSNamesystemMBean
5249      @Metric
5250      public long getBlocksTotal() {
5251        return blockManager.getTotalBlocks();
5252      }
5253    
5254      /**
5255       * Get the total number of COMPLETE blocks in the system.
5256       * For safe mode only complete blocks are counted.
5257       */
5258      private long getCompleteBlocksTotal() {
5259        // Calculate number of blocks under construction
5260        long numUCBlocks = 0;
5261        readLock();
5262        try {
5263          for (Lease lease : leaseManager.getSortedLeases()) {
5264            for (String path : lease.getPaths()) {
5265              final INodeFile cons;
5266              try {
5267                cons = dir.getINode(path).asFile();
5268                Preconditions.checkState(cons.isUnderConstruction());
5269              } catch (UnresolvedLinkException e) {
5270                throw new AssertionError("Lease files should reside on this FS");
5271              }
5272              BlockInfo[] blocks = cons.getBlocks();
5273              if(blocks == null)
5274                continue;
5275              for(BlockInfo b : blocks) {
5276                if(!b.isComplete())
5277                  numUCBlocks++;
5278              }
5279            }
5280          }
5281          LOG.info("Number of blocks under construction: " + numUCBlocks);
5282          return getBlocksTotal() - numUCBlocks;
5283        } finally {
5284          readUnlock();
5285        }
5286      }
5287    
5288      /**
5289       * Enter safe mode. If resourcesLow is false, then we assume it is manual
5290       * @throws IOException
5291       */
5292      void enterSafeMode(boolean resourcesLow) throws IOException {
5293        writeLock();
5294        try {
5295          // Stop the secret manager, since rolling the master key would
5296          // try to write to the edit log
5297          stopSecretManager();
5298    
5299          // Ensure that any concurrent operations have been fully synced
5300          // before entering safe mode. This ensures that the FSImage
5301          // is entirely stable on disk as soon as we're in safe mode.
5302          boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
5303          // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
5304          // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
5305          if (isEditlogOpenForWrite) {
5306            getEditLog().logSyncAll();
5307          }
5308          if (!isInSafeMode()) {
5309            safeMode = new SafeModeInfo(resourcesLow);
5310            return;
5311          }
5312          if (resourcesLow) {
5313            safeMode.setResourcesLow();
5314          } else {
5315            safeMode.setManual();
5316          }
5317          if (isEditlogOpenForWrite) {
5318            getEditLog().logSyncAll();
5319          }
5320          NameNode.stateChangeLog.info("STATE* Safe mode is ON"
5321              + safeMode.getTurnOffTip());
5322        } finally {
5323          writeUnlock();
5324        }
5325      }
5326    
5327      /**
5328       * Leave safe mode.
5329       * @throws IOException
5330       */
5331      void leaveSafeMode() {
5332        writeLock();
5333        try {
5334          if (!isInSafeMode()) {
5335            NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 
5336            return;
5337          }
5338          safeMode.leave();
5339        } finally {
5340          writeUnlock();
5341        }
5342      }
5343        
5344      String getSafeModeTip() {
5345        readLock();
5346        try {
5347          if (!isInSafeMode()) {
5348            return "";
5349          }
5350          return safeMode.getTurnOffTip();
5351        } finally {
5352          readUnlock();
5353        }
5354      }
5355    
5356      CheckpointSignature rollEditLog() throws IOException {
5357        checkSuperuserPrivilege();
5358        checkOperation(OperationCategory.JOURNAL);
5359        writeLock();
5360        try {
5361          checkOperation(OperationCategory.JOURNAL);
5362          checkNameNodeSafeMode("Log not rolled");
5363          if (Server.isRpcInvocation()) {
5364            LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
5365          }
5366          return getFSImage().rollEditLog();
5367        } finally {
5368          writeUnlock();
5369        }
5370      }
5371    
5372      NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
5373          NamenodeRegistration activeNamenode) throws IOException {
5374        checkOperation(OperationCategory.CHECKPOINT);
5375        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
5376            null);
5377        if (cacheEntry != null && cacheEntry.isSuccess()) {
5378          return (NamenodeCommand) cacheEntry.getPayload();
5379        }
5380        writeLock();
5381        NamenodeCommand cmd = null;
5382        try {
5383          checkOperation(OperationCategory.CHECKPOINT);
5384          checkNameNodeSafeMode("Checkpoint not started");
5385          
5386          LOG.info("Start checkpoint for " + backupNode.getAddress());
5387          cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
5388          getEditLog().logSync();
5389          return cmd;
5390        } finally {
5391          writeUnlock();
5392          RetryCache.setState(cacheEntry, cmd != null, cmd);
5393        }
5394      }
5395    
5396      public void processIncrementalBlockReport(final DatanodeID nodeID,
5397          final String poolId, final StorageReceivedDeletedBlocks srdb)
5398          throws IOException {
5399        writeLock();
5400        try {
5401          blockManager.processIncrementalBlockReport(nodeID, srdb);
5402        } finally {
5403          writeUnlock();
5404        }
5405      }
5406      
5407      void endCheckpoint(NamenodeRegistration registration,
5408                                CheckpointSignature sig) throws IOException {
5409        checkOperation(OperationCategory.CHECKPOINT);
5410        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5411        if (cacheEntry != null && cacheEntry.isSuccess()) {
5412          return; // Return previous response
5413        }
5414        boolean success = false;
5415        readLock();
5416        try {
5417          checkOperation(OperationCategory.CHECKPOINT);
5418    
5419          checkNameNodeSafeMode("Checkpoint not ended");
5420          LOG.info("End checkpoint for " + registration.getAddress());
5421          getFSImage().endCheckpoint(sig);
5422          success = true;
5423        } finally {
5424          readUnlock();
5425          RetryCache.setState(cacheEntry, success);
5426        }
5427      }
5428    
5429      PermissionStatus createFsOwnerPermissions(FsPermission permission) {
5430        return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
5431      }
5432    
5433      private void checkOwner(FSPermissionChecker pc, String path)
5434          throws AccessControlException, UnresolvedLinkException {
5435        checkPermission(pc, path, true, null, null, null, null);
5436      }
5437    
5438      private void checkPathAccess(FSPermissionChecker pc,
5439          String path, FsAction access) throws AccessControlException,
5440          UnresolvedLinkException {
5441        checkPermission(pc, path, false, null, null, access, null);
5442      }
5443    
5444      private void checkParentAccess(FSPermissionChecker pc,
5445          String path, FsAction access) throws AccessControlException,
5446          UnresolvedLinkException {
5447        checkPermission(pc, path, false, null, access, null, null);
5448      }
5449    
5450      private void checkAncestorAccess(FSPermissionChecker pc,
5451          String path, FsAction access) throws AccessControlException,
5452          UnresolvedLinkException {
5453        checkPermission(pc, path, false, access, null, null, null);
5454      }
5455    
5456      private void checkTraverse(FSPermissionChecker pc, String path)
5457          throws AccessControlException, UnresolvedLinkException {
5458        checkPermission(pc, path, false, null, null, null, null);
5459      }
5460    
5461      @Override
5462      public void checkSuperuserPrivilege()
5463          throws AccessControlException {
5464        if (isPermissionEnabled) {
5465          FSPermissionChecker pc = getPermissionChecker();
5466          pc.checkSuperuserPrivilege();
5467        }
5468      }
5469    
5470      /**
5471       * Check whether current user have permissions to access the path. For more
5472       * details of the parameters, see
5473       * {@link FSPermissionChecker#checkPermission()}.
5474       */
5475      private void checkPermission(FSPermissionChecker pc,
5476          String path, boolean doCheckOwner, FsAction ancestorAccess,
5477          FsAction parentAccess, FsAction access, FsAction subAccess)
5478          throws AccessControlException, UnresolvedLinkException {
5479            checkPermission(pc, path, doCheckOwner, ancestorAccess,
5480                parentAccess, access, subAccess, true);
5481      }
5482    
5483      /**
5484       * Check whether current user have permissions to access the path. For more
5485       * details of the parameters, see
5486       * {@link FSPermissionChecker#checkPermission()}.
5487       */
5488      private void checkPermission(FSPermissionChecker pc,
5489          String path, boolean doCheckOwner, FsAction ancestorAccess,
5490          FsAction parentAccess, FsAction access, FsAction subAccess,
5491          boolean resolveLink)
5492          throws AccessControlException, UnresolvedLinkException {
5493        if (!pc.isSuperUser()) {
5494          dir.waitForReady();
5495          readLock();
5496          try {
5497            pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess,
5498                parentAccess, access, subAccess, resolveLink);
5499          } finally {
5500            readUnlock();
5501          }
5502        }
5503      }
5504      
5505      /**
5506       * Check to see if we have exceeded the limit on the number
5507       * of inodes.
5508       */
5509      void checkFsObjectLimit() throws IOException {
5510        if (maxFsObjects != 0 &&
5511            maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
5512          throw new IOException("Exceeded the configured number of objects " +
5513                                 maxFsObjects + " in the filesystem.");
5514        }
5515      }
5516    
5517      /**
5518       * Get the total number of objects in the system. 
5519       */
5520      @Override // FSNamesystemMBean
5521      public long getMaxObjects() {
5522        return maxFsObjects;
5523      }
5524    
5525      @Override // FSNamesystemMBean
5526      @Metric
5527      public long getFilesTotal() {
5528        readLock();
5529        try {
5530          return this.dir.totalInodes();
5531        } finally {
5532          readUnlock();
5533        }
5534      }
5535    
5536      @Override // FSNamesystemMBean
5537      @Metric
5538      public long getPendingReplicationBlocks() {
5539        return blockManager.getPendingReplicationBlocksCount();
5540      }
5541    
5542      @Override // FSNamesystemMBean
5543      @Metric
5544      public long getUnderReplicatedBlocks() {
5545        return blockManager.getUnderReplicatedBlocksCount();
5546      }
5547    
5548      /** Returns number of blocks with corrupt replicas */
5549      @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
5550      public long getCorruptReplicaBlocks() {
5551        return blockManager.getCorruptReplicaBlocksCount();
5552      }
5553    
5554      @Override // FSNamesystemMBean
5555      @Metric
5556      public long getScheduledReplicationBlocks() {
5557        return blockManager.getScheduledReplicationBlocksCount();
5558      }
5559    
5560      @Override
5561      @Metric
5562      public long getPendingDeletionBlocks() {
5563        return blockManager.getPendingDeletionBlocksCount();
5564      }
5565    
5566      @Metric
5567      public long getExcessBlocks() {
5568        return blockManager.getExcessBlocksCount();
5569      }
5570      
5571      // HA-only metric
5572      @Metric
5573      public long getPostponedMisreplicatedBlocks() {
5574        return blockManager.getPostponedMisreplicatedBlocksCount();
5575      }
5576    
5577      // HA-only metric
5578      @Metric
5579      public int getPendingDataNodeMessageCount() {
5580        return blockManager.getPendingDataNodeMessageCount();
5581      }
5582      
5583      // HA-only metric
5584      @Metric
5585      public String getHAState() {
5586        return haContext.getState().toString();
5587      }
5588    
5589      // HA-only metric
5590      @Metric
5591      public long getMillisSinceLastLoadedEdits() {
5592        if (isInStandbyState() && editLogTailer != null) {
5593          return now() - editLogTailer.getLastLoadTimestamp();
5594        } else {
5595          return 0;
5596        }
5597      }
5598      
5599      @Metric
5600      public int getBlockCapacity() {
5601        return blockManager.getCapacity();
5602      }
5603    
5604      @Override // FSNamesystemMBean
5605      public String getFSState() {
5606        return isInSafeMode() ? "safeMode" : "Operational";
5607      }
5608      
5609      private ObjectName mbeanName;
5610      private ObjectName mxbeanName;
5611    
5612      /**
5613       * Register the FSNamesystem MBean using the name
5614       *        "hadoop:service=NameNode,name=FSNamesystemState"
5615       */
5616      private void registerMBean() {
5617        // We can only implement one MXBean interface, so we keep the old one.
5618        try {
5619          StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
5620          mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
5621        } catch (NotCompliantMBeanException e) {
5622          throw new RuntimeException("Bad MBean setup", e);
5623        }
5624    
5625        LOG.info("Registered FSNamesystemState MBean");
5626      }
5627    
5628      /**
5629       * shutdown FSNamesystem
5630       */
5631      void shutdown() {
5632        if (mbeanName != null) {
5633          MBeans.unregister(mbeanName);
5634          mbeanName = null;
5635        }
5636        if (mxbeanName != null) {
5637          MBeans.unregister(mxbeanName);
5638          mxbeanName = null;
5639        }
5640        if (dir != null) {
5641          dir.shutdown();
5642        }
5643        if (blockManager != null) {
5644          blockManager.shutdown();
5645        }
5646      }
5647      
5648    
5649      @Override // FSNamesystemMBean
5650      public int getNumLiveDataNodes() {
5651        return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
5652      }
5653    
5654      @Override // FSNamesystemMBean
5655      public int getNumDeadDataNodes() {
5656        return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
5657      }
5658      
5659      @Override // FSNamesystemMBean
5660      public int getNumDecomLiveDataNodes() {
5661        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
5662        getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
5663        int liveDecommissioned = 0;
5664        for (DatanodeDescriptor node : live) {
5665          liveDecommissioned += node.isDecommissioned() ? 1 : 0;
5666        }
5667        return liveDecommissioned;
5668      }
5669    
5670      @Override // FSNamesystemMBean
5671      public int getNumDecomDeadDataNodes() {
5672        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
5673        getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
5674        int deadDecommissioned = 0;
5675        for (DatanodeDescriptor node : dead) {
5676          deadDecommissioned += node.isDecommissioned() ? 1 : 0;
5677        }
5678        return deadDecommissioned;
5679      }
5680    
5681      @Override // FSNamesystemMBean
5682      public int getNumDecommissioningDataNodes() {
5683        return getBlockManager().getDatanodeManager().getDecommissioningNodes()
5684            .size();
5685      }
5686    
5687      @Override // FSNamesystemMBean
5688      @Metric({"StaleDataNodes", 
5689        "Number of datanodes marked stale due to delayed heartbeat"})
5690      public int getNumStaleDataNodes() {
5691        return getBlockManager().getDatanodeManager().getNumStaleNodes();
5692      }
5693    
5694      /**
5695       * Sets the current generation stamp for legacy blocks
5696       */
5697      void setGenerationStampV1(long stamp) {
5698        generationStampV1.setCurrentValue(stamp);
5699      }
5700    
5701      /**
5702       * Gets the current generation stamp for legacy blocks
5703       */
5704      long getGenerationStampV1() {
5705        return generationStampV1.getCurrentValue();
5706      }
5707    
5708      /**
5709       * Gets the current generation stamp for this filesystem
5710       */
5711      void setGenerationStampV2(long stamp) {
5712        generationStampV2.setCurrentValue(stamp);
5713      }
5714    
5715      /**
5716       * Gets the current generation stamp for this filesystem
5717       */
5718      long getGenerationStampV2() {
5719        return generationStampV2.getCurrentValue();
5720      }
5721    
5722      /**
5723       * Upgrades the generation stamp for the filesystem
5724       * by reserving a sufficient range for all existing blocks.
5725       * Should be invoked only during the first upgrade to
5726       * sequential block IDs.
5727       */
5728      long upgradeGenerationStampToV2() {
5729        Preconditions.checkState(generationStampV2.getCurrentValue() ==
5730            GenerationStamp.LAST_RESERVED_STAMP);
5731    
5732        generationStampV2.skipTo(
5733            generationStampV1.getCurrentValue() +
5734            HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
5735    
5736        generationStampV1Limit = generationStampV2.getCurrentValue();
5737        return generationStampV2.getCurrentValue();
5738      }
5739    
5740      /**
5741       * Sets the generation stamp that delineates random and sequentially
5742       * allocated block IDs.
5743       * @param stamp
5744       */
5745      void setGenerationStampV1Limit(long stamp) {
5746        Preconditions.checkState(generationStampV1Limit ==
5747                                 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
5748        generationStampV1Limit = stamp;
5749      }
5750    
5751      /**
5752       * Gets the value of the generation stamp that delineates sequential
5753       * and random block IDs.
5754       */
5755      long getGenerationStampAtblockIdSwitch() {
5756        return generationStampV1Limit;
5757      }
5758    
5759      @VisibleForTesting
5760      SequentialBlockIdGenerator getBlockIdGenerator() {
5761        return blockIdGenerator;
5762      }
5763    
5764      /**
5765       * Sets the maximum allocated block ID for this filesystem. This is
5766       * the basis for allocating new block IDs.
5767       */
5768      void setLastAllocatedBlockId(long blockId) {
5769        blockIdGenerator.skipTo(blockId);
5770      }
5771    
5772      /**
5773       * Gets the maximum sequentially allocated block ID for this filesystem
5774       */
5775      long getLastAllocatedBlockId() {
5776        return blockIdGenerator.getCurrentValue();
5777      }
5778    
5779      /**
5780       * Increments, logs and then returns the stamp
5781       */
5782      long nextGenerationStamp(boolean legacyBlock)
5783          throws IOException, SafeModeException {
5784        assert hasWriteLock();
5785        checkNameNodeSafeMode("Cannot get next generation stamp");
5786    
5787        long gs;
5788        if (legacyBlock) {
5789          gs = getNextGenerationStampV1();
5790          getEditLog().logGenerationStampV1(gs);
5791        } else {
5792          gs = getNextGenerationStampV2();
5793          getEditLog().logGenerationStampV2(gs);
5794        }
5795    
5796        // NB: callers sync the log
5797        return gs;
5798      }
5799    
5800      @VisibleForTesting
5801      long getNextGenerationStampV1() throws IOException {
5802        long genStampV1 = generationStampV1.nextValue();
5803    
5804        if (genStampV1 >= generationStampV1Limit) {
5805          // We ran out of generation stamps for legacy blocks. In practice, it
5806          // is extremely unlikely as we reserved 1T v1 generation stamps. The
5807          // result is that we can no longer append to the legacy blocks that
5808          // were created before the upgrade to sequential block IDs.
5809          throw new OutOfV1GenerationStampsException();
5810        }
5811    
5812        return genStampV1;
5813      }
5814    
5815      @VisibleForTesting
5816      long getNextGenerationStampV2() {
5817        return generationStampV2.nextValue();
5818      }
5819    
5820      long getGenerationStampV1Limit() {
5821        return generationStampV1Limit;
5822      }
5823    
5824      /**
5825       * Determine whether the block ID was randomly generated (legacy) or
5826       * sequentially generated. The generation stamp value is used to
5827       * make the distinction.
5828       * @param block
5829       * @return true if the block ID was randomly generated, false otherwise.
5830       */
5831      boolean isLegacyBlock(Block block) {
5832        return block.getGenerationStamp() < getGenerationStampV1Limit();
5833      }
5834    
5835      /**
5836       * Increments, logs and then returns the block ID
5837       */
5838      private long nextBlockId() throws IOException {
5839        assert hasWriteLock();
5840        checkNameNodeSafeMode("Cannot get next block ID");
5841        final long blockId = blockIdGenerator.nextValue();
5842        getEditLog().logAllocateBlockId(blockId);
5843        // NB: callers sync the log
5844        return blockId;
5845      }
5846    
5847      private INodeFile checkUCBlock(ExtendedBlock block,
5848          String clientName) throws IOException {
5849        assert hasWriteLock();
5850        checkNameNodeSafeMode("Cannot get a new generation stamp and an "
5851            + "access token for block " + block);
5852        
5853        // check stored block state
5854        BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
5855        if (storedBlock == null || 
5856            storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
5857            throw new IOException(block + 
5858                " does not exist or is not under Construction" + storedBlock);
5859        }
5860        
5861        // check file inode
5862        final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
5863        if (file == null || !file.isUnderConstruction()) {
5864          throw new IOException("The file " + storedBlock + 
5865              " belonged to does not exist or it is not under construction.");
5866        }
5867        
5868        // check lease
5869        if (clientName == null
5870            || !clientName.equals(file.getFileUnderConstructionFeature()
5871                .getClientName())) {
5872          throw new LeaseExpiredException("Lease mismatch: " + block + 
5873              " is accessed by a non lease holder " + clientName); 
5874        }
5875    
5876        return file;
5877      }
5878      
5879      /**
5880       * Client is reporting some bad block locations.
5881       */
5882      void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
5883        checkOperation(OperationCategory.WRITE);
5884        NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
5885        writeLock();
5886        try {
5887          checkOperation(OperationCategory.WRITE);
5888          for (int i = 0; i < blocks.length; i++) {
5889            ExtendedBlock blk = blocks[i].getBlock();
5890            DatanodeInfo[] nodes = blocks[i].getLocations();
5891            String[] storageIDs = blocks[i].getStorageIDs();
5892            for (int j = 0; j < nodes.length; j++) {
5893              blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
5894                  storageIDs == null ? null: storageIDs[j], 
5895                  "client machine reported it");
5896            }
5897          }
5898        } finally {
5899          writeUnlock();
5900        }
5901      }
5902    
5903      /**
5904       * Get a new generation stamp together with an access token for 
5905       * a block under construction
5906       * 
5907       * This method is called for recovering a failed pipeline or setting up
5908       * a pipeline to append to a block.
5909       * 
5910       * @param block a block
5911       * @param clientName the name of a client
5912       * @return a located block with a new generation stamp and an access token
5913       * @throws IOException if any error occurs
5914       */
5915      LocatedBlock updateBlockForPipeline(ExtendedBlock block, 
5916          String clientName) throws IOException {
5917        LocatedBlock locatedBlock;
5918        checkOperation(OperationCategory.WRITE);
5919        writeLock();
5920        try {
5921          checkOperation(OperationCategory.WRITE);
5922    
5923          // check vadility of parameters
5924          checkUCBlock(block, clientName);
5925      
5926          // get a new generation stamp and an access token
5927          block.setGenerationStamp(
5928              nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
5929          locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
5930          blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
5931        } finally {
5932          writeUnlock();
5933        }
5934        // Ensure we record the new generation stamp
5935        getEditLog().logSync();
5936        return locatedBlock;
5937      }
5938      
5939      /**
5940       * Update a pipeline for a block under construction
5941       * 
5942       * @param clientName the name of the client
5943       * @param oldBlock and old block
5944       * @param newBlock a new block with a new generation stamp and length
5945       * @param newNodes datanodes in the pipeline
5946       * @throws IOException if any error occurs
5947       */
5948      void updatePipeline(String clientName, ExtendedBlock oldBlock, 
5949          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
5950          throws IOException {
5951        checkOperation(OperationCategory.WRITE);
5952        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5953        if (cacheEntry != null && cacheEntry.isSuccess()) {
5954          return; // Return previous response
5955        }
5956        LOG.info("updatePipeline(block=" + oldBlock
5957                 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
5958                 + ", newLength=" + newBlock.getNumBytes()
5959                 + ", newNodes=" + Arrays.asList(newNodes)
5960                 + ", clientName=" + clientName
5961                 + ")");
5962        writeLock();
5963        boolean success = false;
5964        try {
5965          checkOperation(OperationCategory.WRITE);
5966          checkNameNodeSafeMode("Pipeline not updated");
5967          assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
5968            + oldBlock + " has different block identifier";
5969          updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
5970              newStorageIDs, cacheEntry != null);
5971          success = true;
5972        } finally {
5973          writeUnlock();
5974          RetryCache.setState(cacheEntry, success);
5975        }
5976        getEditLog().logSync();
5977        LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
5978      }
5979    
5980      /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */
5981      private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 
5982          ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
5983          boolean logRetryCache)
5984          throws IOException {
5985        assert hasWriteLock();
5986        // check the vadility of the block and lease holder name
5987        final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
5988        final BlockInfoUnderConstruction blockinfo
5989            = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
5990    
5991        // check new GS & length: this is not expected
5992        if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
5993            newBlock.getNumBytes() < blockinfo.getNumBytes()) {
5994          String msg = "Update " + oldBlock + " (len = " + 
5995            blockinfo.getNumBytes() + ") to an older state: " + newBlock + 
5996            " (len = " + newBlock.getNumBytes() +")";
5997          LOG.warn(msg);
5998          throw new IOException(msg);
5999        }
6000    
6001        // Update old block with the new generation stamp and new length
6002        blockinfo.setNumBytes(newBlock.getNumBytes());
6003        blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
6004    
6005        // find the DatanodeDescriptor objects
6006        final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
6007            .getDatanodeStorageInfos(newNodes, newStorageIDs);
6008        blockinfo.setExpectedLocations(storages);
6009    
6010        String src = pendingFile.getFullPathName();
6011        dir.persistBlocks(src, pendingFile, logRetryCache);
6012      }
6013    
6014      // rename was successful. If any part of the renamed subtree had
6015      // files that were being written to, update with new filename.
6016      void unprotectedChangeLease(String src, String dst) {
6017        assert hasWriteLock();
6018        leaseManager.changeLease(src, dst);
6019      }
6020    
6021      /**
6022       * @return all the under-construction files in the lease map
6023       */
6024      Map<String, INodeFile> getFilesUnderConstruction() {
6025        synchronized (leaseManager) {
6026          return leaseManager.getINodesUnderConstruction();
6027        }
6028      }
6029    
6030      /**
6031       * Register a Backup name-node, verifying that it belongs
6032       * to the correct namespace, and adding it to the set of
6033       * active journals if necessary.
6034       * 
6035       * @param bnReg registration of the new BackupNode
6036       * @param nnReg registration of this NameNode
6037       * @throws IOException if the namespace IDs do not match
6038       */
6039      void registerBackupNode(NamenodeRegistration bnReg,
6040          NamenodeRegistration nnReg) throws IOException {
6041        writeLock();
6042        try {
6043          if(getFSImage().getStorage().getNamespaceID() 
6044             != bnReg.getNamespaceID())
6045            throw new IOException("Incompatible namespaceIDs: "
6046                + " Namenode namespaceID = "
6047                + getFSImage().getStorage().getNamespaceID() + "; "
6048                + bnReg.getRole() +
6049                " node namespaceID = " + bnReg.getNamespaceID());
6050          if (bnReg.getRole() == NamenodeRole.BACKUP) {
6051            getFSImage().getEditLog().registerBackupNode(
6052                bnReg, nnReg);
6053          }
6054        } finally {
6055          writeUnlock();
6056        }
6057      }
6058    
6059      /**
6060       * Release (unregister) backup node.
6061       * <p>
6062       * Find and remove the backup stream corresponding to the node.
6063       * @param registration
6064       * @throws IOException
6065       */
6066      void releaseBackupNode(NamenodeRegistration registration)
6067        throws IOException {
6068        checkOperation(OperationCategory.WRITE);
6069        writeLock();
6070        try {
6071          checkOperation(OperationCategory.WRITE);
6072          if(getFSImage().getStorage().getNamespaceID()
6073             != registration.getNamespaceID())
6074            throw new IOException("Incompatible namespaceIDs: "
6075                + " Namenode namespaceID = "
6076                + getFSImage().getStorage().getNamespaceID() + "; "
6077                + registration.getRole() +
6078                " node namespaceID = " + registration.getNamespaceID());
6079          getEditLog().releaseBackupStream(registration);
6080        } finally {
6081          writeUnlock();
6082        }
6083      }
6084    
6085      static class CorruptFileBlockInfo {
6086        final String path;
6087        final Block block;
6088        
6089        public CorruptFileBlockInfo(String p, Block b) {
6090          path = p;
6091          block = b;
6092        }
6093        
6094        @Override
6095        public String toString() {
6096          return block.getBlockName() + "\t" + path;
6097        }
6098      }
6099      /**
6100       * @param path Restrict corrupt files to this portion of namespace.
6101       * @param startBlockAfter Support for continuation; the set of files we return
6102       *  back is ordered by blockid; startBlockAfter tells where to start from
6103       * @return a list in which each entry describes a corrupt file/block
6104       * @throws AccessControlException
6105       * @throws IOException
6106       */
6107      Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
6108      String[] cookieTab) throws IOException {
6109        checkSuperuserPrivilege();
6110        checkOperation(OperationCategory.READ);
6111        readLock();
6112        try {
6113          checkOperation(OperationCategory.READ);
6114          if (!isPopulatingReplQueues()) {
6115            throw new IOException("Cannot run listCorruptFileBlocks because " +
6116                                  "replication queues have not been initialized.");
6117          }
6118          // print a limited # of corrupt files per call
6119          int count = 0;
6120          ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
6121    
6122          final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
6123    
6124          if (cookieTab == null) {
6125            cookieTab = new String[] { null };
6126          }
6127          int skip = getIntCookie(cookieTab[0]);
6128          for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
6129            blkIterator.next();
6130          }
6131    
6132          while (blkIterator.hasNext()) {
6133            Block blk = blkIterator.next();
6134            final INode inode = (INode)blockManager.getBlockCollection(blk);
6135            skip++;
6136            if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
6137              String src = FSDirectory.getFullPathName(inode);
6138              if (src.startsWith(path)){
6139                corruptFiles.add(new CorruptFileBlockInfo(src, blk));
6140                count++;
6141                if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
6142                  break;
6143              }
6144            }
6145          }
6146          cookieTab[0] = String.valueOf(skip);
6147          LOG.info("list corrupt file blocks returned: " + count);
6148          return corruptFiles;
6149        } finally {
6150          readUnlock();
6151        }
6152      }
6153    
6154      /**
6155       * Convert string cookie to integer.
6156       */
6157      private static int getIntCookie(String cookie){
6158        int c;
6159        if(cookie == null){
6160          c = 0;
6161        } else {
6162          try{
6163            c = Integer.parseInt(cookie);
6164          }catch (NumberFormatException e) {
6165            c = 0;
6166          }
6167        }
6168        c = Math.max(0, c);
6169        return c;
6170      }
6171    
6172      /**
6173       * Create delegation token secret manager
6174       */
6175      private DelegationTokenSecretManager createDelegationTokenSecretManager(
6176          Configuration conf) {
6177        return new DelegationTokenSecretManager(conf.getLong(
6178            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
6179            DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
6180            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
6181                DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
6182            conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
6183                DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
6184            DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
6185            conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
6186                DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
6187            this);
6188      }
6189    
6190      /**
6191       * Returns the DelegationTokenSecretManager instance in the namesystem.
6192       * @return delegation token secret manager object
6193       */
6194      DelegationTokenSecretManager getDelegationTokenSecretManager() {
6195        return dtSecretManager;
6196      }
6197    
6198      /**
6199       * @param renewer
6200       * @return Token<DelegationTokenIdentifier>
6201       * @throws IOException
6202       */
6203      Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
6204          throws IOException {
6205        Token<DelegationTokenIdentifier> token;
6206        checkOperation(OperationCategory.WRITE);
6207        writeLock();
6208        try {
6209          checkOperation(OperationCategory.WRITE);
6210          checkNameNodeSafeMode("Cannot issue delegation token");
6211          if (!isAllowedDelegationTokenOp()) {
6212            throw new IOException(
6213              "Delegation Token can be issued only with kerberos or web authentication");
6214          }
6215          if (dtSecretManager == null || !dtSecretManager.isRunning()) {
6216            LOG.warn("trying to get DT with no secret manager running");
6217            return null;
6218          }
6219    
6220          UserGroupInformation ugi = getRemoteUser();
6221          String user = ugi.getUserName();
6222          Text owner = new Text(user);
6223          Text realUser = null;
6224          if (ugi.getRealUser() != null) {
6225            realUser = new Text(ugi.getRealUser().getUserName());
6226          }
6227          DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
6228            renewer, realUser);
6229          token = new Token<DelegationTokenIdentifier>(
6230            dtId, dtSecretManager);
6231          long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
6232          getEditLog().logGetDelegationToken(dtId, expiryTime);
6233        } finally {
6234          writeUnlock();
6235        }
6236        getEditLog().logSync();
6237        return token;
6238      }
6239    
6240      /**
6241       * 
6242       * @param token
6243       * @return New expiryTime of the token
6244       * @throws InvalidToken
6245       * @throws IOException
6246       */
6247      long renewDelegationToken(Token<DelegationTokenIdentifier> token)
6248          throws InvalidToken, IOException {
6249        long expiryTime;
6250        checkOperation(OperationCategory.WRITE);
6251        writeLock();
6252        try {
6253          checkOperation(OperationCategory.WRITE);
6254    
6255          checkNameNodeSafeMode("Cannot renew delegation token");
6256          if (!isAllowedDelegationTokenOp()) {
6257            throw new IOException(
6258                "Delegation Token can be renewed only with kerberos or web authentication");
6259          }
6260          String renewer = getRemoteUser().getShortUserName();
6261          expiryTime = dtSecretManager.renewToken(token, renewer);
6262          DelegationTokenIdentifier id = new DelegationTokenIdentifier();
6263          ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
6264          DataInputStream in = new DataInputStream(buf);
6265          id.readFields(in);
6266          getEditLog().logRenewDelegationToken(id, expiryTime);
6267        } finally {
6268          writeUnlock();
6269        }
6270        getEditLog().logSync();
6271        return expiryTime;
6272      }
6273    
6274      /**
6275       * 
6276       * @param token
6277       * @throws IOException
6278       */
6279      void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
6280          throws IOException {
6281        checkOperation(OperationCategory.WRITE);
6282        writeLock();
6283        try {
6284          checkOperation(OperationCategory.WRITE);
6285    
6286          checkNameNodeSafeMode("Cannot cancel delegation token");
6287          String canceller = getRemoteUser().getUserName();
6288          DelegationTokenIdentifier id = dtSecretManager
6289            .cancelToken(token, canceller);
6290          getEditLog().logCancelDelegationToken(id);
6291        } finally {
6292          writeUnlock();
6293        }
6294        getEditLog().logSync();
6295      }
6296    
6297      SecretManagerState saveSecretManagerState() {
6298        return dtSecretManager.saveSecretManagerState();
6299      }
6300    
6301      /**
6302       * @param in load the state of secret manager from input stream
6303       */
6304      void loadSecretManagerStateCompat(DataInput in) throws IOException {
6305        dtSecretManager.loadSecretManagerStateCompat(in);
6306      }
6307    
6308      void loadSecretManagerState(SecretManagerSection s,
6309          List<SecretManagerSection.DelegationKey> keys,
6310          List<SecretManagerSection.PersistToken> tokens) throws IOException {
6311        dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
6312      }
6313    
6314      /**
6315       * Log the updateMasterKey operation to edit logs
6316       * 
6317       * @param key new delegation key.
6318       */
6319      public void logUpdateMasterKey(DelegationKey key) {
6320        
6321        assert !isInSafeMode() :
6322          "this should never be called while in safemode, since we stop " +
6323          "the DT manager before entering safemode!";
6324        // No need to hold FSN lock since we don't access any internal
6325        // structures, and this is stopped before the FSN shuts itself
6326        // down, etc.
6327        getEditLog().logUpdateMasterKey(key);
6328        getEditLog().logSync();
6329      }
6330      
6331      /**
6332       * Log the cancellation of expired tokens to edit logs
6333       * 
6334       * @param id token identifier to cancel
6335       */
6336      public void logExpireDelegationToken(DelegationTokenIdentifier id) {
6337        assert !isInSafeMode() :
6338          "this should never be called while in safemode, since we stop " +
6339          "the DT manager before entering safemode!";
6340        // No need to hold FSN lock since we don't access any internal
6341        // structures, and this is stopped before the FSN shuts itself
6342        // down, etc.
6343        getEditLog().logCancelDelegationToken(id);
6344      }  
6345      
6346      private void logReassignLease(String leaseHolder, String src,
6347          String newHolder) {
6348        assert hasWriteLock();
6349        getEditLog().logReassignLease(leaseHolder, src, newHolder);
6350      }
6351      
6352      /**
6353       * 
6354       * @return true if delegation token operation is allowed
6355       */
6356      private boolean isAllowedDelegationTokenOp() throws IOException {
6357        AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
6358        if (UserGroupInformation.isSecurityEnabled()
6359            && (authMethod != AuthenticationMethod.KERBEROS)
6360            && (authMethod != AuthenticationMethod.KERBEROS_SSL)
6361            && (authMethod != AuthenticationMethod.CERTIFICATE)) {
6362          return false;
6363        }
6364        return true;
6365      }
6366      
6367      /**
6368       * Returns authentication method used to establish the connection
6369       * @return AuthenticationMethod used to establish connection
6370       * @throws IOException
6371       */
6372      private AuthenticationMethod getConnectionAuthenticationMethod()
6373          throws IOException {
6374        UserGroupInformation ugi = getRemoteUser();
6375        AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
6376        if (authMethod == AuthenticationMethod.PROXY) {
6377          authMethod = ugi.getRealUser().getAuthenticationMethod();
6378        }
6379        return authMethod;
6380      }
6381      
6382      /**
6383       * Client invoked methods are invoked over RPC and will be in 
6384       * RPC call context even if the client exits.
6385       */
6386      private boolean isExternalInvocation() {
6387        return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
6388      }
6389    
6390      private static InetAddress getRemoteIp() {
6391        InetAddress ip = Server.getRemoteIp();
6392        if (ip != null) {
6393          return ip;
6394        }
6395        return NamenodeWebHdfsMethods.getRemoteIp();
6396      }
6397      
6398      // optimize ugi lookup for RPC operations to avoid a trip through
6399      // UGI.getCurrentUser which is synch'ed
6400      private static UserGroupInformation getRemoteUser() throws IOException {
6401        return NameNode.getRemoteUser();
6402      }
6403      
6404      /**
6405       * Log fsck event in the audit log 
6406       */
6407      void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
6408        if (isAuditEnabled()) {
6409          logAuditEvent(true, getRemoteUser(),
6410                        remoteAddress,
6411                        "fsck", src, null, null);
6412        }
6413      }
6414      /**
6415       * Register NameNodeMXBean
6416       */
6417      private void registerMXBean() {
6418        mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
6419      }
6420    
6421      /**
6422       * Class representing Namenode information for JMX interfaces
6423       */
6424      @Override // NameNodeMXBean
6425      public String getVersion() {
6426        return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
6427      }
6428    
6429      @Override // NameNodeMXBean
6430      public long getUsed() {
6431        return this.getCapacityUsed();
6432      }
6433    
6434      @Override // NameNodeMXBean
6435      public long getFree() {
6436        return this.getCapacityRemaining();
6437      }
6438    
6439      @Override // NameNodeMXBean
6440      public long getTotal() {
6441        return this.getCapacityTotal();
6442      }
6443    
6444      @Override // NameNodeMXBean
6445      public String getSafemode() {
6446        if (!this.isInSafeMode())
6447          return "";
6448        return "Safe mode is ON. " + this.getSafeModeTip();
6449      }
6450    
6451      @Override // NameNodeMXBean
6452      public boolean isUpgradeFinalized() {
6453        return this.getFSImage().isUpgradeFinalized();
6454      }
6455    
6456      @Override // NameNodeMXBean
6457      public long getNonDfsUsedSpace() {
6458        return datanodeStatistics.getCapacityUsedNonDFS();
6459      }
6460    
6461      @Override // NameNodeMXBean
6462      public float getPercentUsed() {
6463        return datanodeStatistics.getCapacityUsedPercent();
6464      }
6465    
6466      @Override // NameNodeMXBean
6467      public long getBlockPoolUsedSpace() {
6468        return datanodeStatistics.getBlockPoolUsed();
6469      }
6470    
6471      @Override // NameNodeMXBean
6472      public float getPercentBlockPoolUsed() {
6473        return datanodeStatistics.getPercentBlockPoolUsed();
6474      }
6475    
6476      @Override // NameNodeMXBean
6477      public float getPercentRemaining() {
6478        return datanodeStatistics.getCapacityRemainingPercent();
6479      }
6480    
6481      @Override // NameNodeMXBean
6482      public long getCacheCapacity() {
6483        return datanodeStatistics.getCacheCapacity();
6484      }
6485    
6486      @Override // NameNodeMXBean
6487      public long getCacheUsed() {
6488        return datanodeStatistics.getCacheUsed();
6489      }
6490    
6491      @Override // NameNodeMXBean
6492      public long getTotalBlocks() {
6493        return getBlocksTotal();
6494      }
6495    
6496      @Override // NameNodeMXBean
6497      @Metric
6498      public long getTotalFiles() {
6499        return getFilesTotal();
6500      }
6501    
6502      @Override // NameNodeMXBean
6503      public long getNumberOfMissingBlocks() {
6504        return getMissingBlocksCount();
6505      }
6506      
6507      @Override // NameNodeMXBean
6508      public int getThreads() {
6509        return ManagementFactory.getThreadMXBean().getThreadCount();
6510      }
6511    
6512      /**
6513       * Returned information is a JSON representation of map with host name as the
6514       * key and value is a map of live node attribute keys to its values
6515       */
6516      @Override // NameNodeMXBean
6517      public String getLiveNodes() {
6518        final Map<String, Map<String,Object>> info = 
6519          new HashMap<String, Map<String,Object>>();
6520        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6521        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6522        for (DatanodeDescriptor node : live) {
6523          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6524              .put("infoAddr", node.getInfoAddr())
6525              .put("infoSecureAddr", node.getInfoSecureAddr())
6526              .put("xferaddr", node.getXferAddr())
6527              .put("lastContact", getLastContact(node))
6528              .put("usedSpace", getDfsUsed(node))
6529              .put("adminState", node.getAdminState().toString())
6530              .put("nonDfsUsedSpace", node.getNonDfsUsed())
6531              .put("capacity", node.getCapacity())
6532              .put("numBlocks", node.numBlocks())
6533              .put("version", node.getSoftwareVersion())
6534              .put("used", node.getDfsUsed())
6535              .put("remaining", node.getRemaining())
6536              .put("blockScheduled", node.getBlocksScheduled())
6537              .put("blockPoolUsed", node.getBlockPoolUsed())
6538              .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
6539              .put("volfails", node.getVolumeFailures())
6540              .build();
6541    
6542          info.put(node.getHostName(), innerinfo);
6543        }
6544        return JSON.toString(info);
6545      }
6546    
6547      /**
6548       * Returned information is a JSON representation of map with host name as the
6549       * key and value is a map of dead node attribute keys to its values
6550       */
6551      @Override // NameNodeMXBean
6552      public String getDeadNodes() {
6553        final Map<String, Map<String, Object>> info = 
6554          new HashMap<String, Map<String, Object>>();
6555        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6556        blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
6557        for (DatanodeDescriptor node : dead) {
6558          Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6559              .put("lastContact", getLastContact(node))
6560              .put("decommissioned", node.isDecommissioned())
6561              .put("xferaddr", node.getXferAddr())
6562              .build();
6563          info.put(node.getHostName(), innerinfo);
6564        }
6565        return JSON.toString(info);
6566      }
6567    
6568      /**
6569       * Returned information is a JSON representation of map with host name as the
6570       * key and value is a map of decomisioning node attribute keys to its values
6571       */
6572      @Override // NameNodeMXBean
6573      public String getDecomNodes() {
6574        final Map<String, Map<String, Object>> info = 
6575          new HashMap<String, Map<String, Object>>();
6576        final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
6577            ).getDecommissioningNodes();
6578        for (DatanodeDescriptor node : decomNodeList) {
6579          Map<String, Object> innerinfo = ImmutableMap
6580              .<String, Object> builder()
6581              .put("xferaddr", node.getXferAddr())
6582              .put("underReplicatedBlocks",
6583                  node.decommissioningStatus.getUnderReplicatedBlocks())
6584              .put("decommissionOnlyReplicas",
6585                  node.decommissioningStatus.getDecommissionOnlyReplicas())
6586              .put("underReplicateInOpenFiles",
6587                  node.decommissioningStatus.getUnderReplicatedInOpenFiles())
6588              .build();
6589          info.put(node.getHostName(), innerinfo);
6590        }
6591        return JSON.toString(info);
6592      }
6593    
6594      private long getLastContact(DatanodeDescriptor alivenode) {
6595        return (Time.now() - alivenode.getLastUpdate())/1000;
6596      }
6597    
6598      private long getDfsUsed(DatanodeDescriptor alivenode) {
6599        return alivenode.getDfsUsed();
6600      }
6601    
6602      @Override  // NameNodeMXBean
6603      public String getClusterId() {
6604        return dir.fsImage.getStorage().getClusterID();
6605      }
6606      
6607      @Override  // NameNodeMXBean
6608      public String getBlockPoolId() {
6609        return blockPoolId;
6610      }
6611      
6612      @Override  // NameNodeMXBean
6613      public String getNameDirStatuses() {
6614        Map<String, Map<File, StorageDirType>> statusMap =
6615          new HashMap<String, Map<File, StorageDirType>>();
6616        
6617        Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
6618        for (Iterator<StorageDirectory> it
6619            = getFSImage().getStorage().dirIterator(); it.hasNext();) {
6620          StorageDirectory st = it.next();
6621          activeDirs.put(st.getRoot(), st.getStorageDirType());
6622        }
6623        statusMap.put("active", activeDirs);
6624        
6625        List<Storage.StorageDirectory> removedStorageDirs
6626            = getFSImage().getStorage().getRemovedStorageDirs();
6627        Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
6628        for (StorageDirectory st : removedStorageDirs) {
6629          failedDirs.put(st.getRoot(), st.getStorageDirType());
6630        }
6631        statusMap.put("failed", failedDirs);
6632        
6633        return JSON.toString(statusMap);
6634      }
6635    
6636      @Override // NameNodeMXBean
6637      public String getNodeUsage() {
6638        float median = 0;
6639        float max = 0;
6640        float min = 0;
6641        float dev = 0;
6642    
6643        final Map<String, Map<String,Object>> info =
6644            new HashMap<String, Map<String,Object>>();
6645        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6646        blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6647    
6648        if (live.size() > 0) {
6649          float totalDfsUsed = 0;
6650          float[] usages = new float[live.size()];
6651          int i = 0;
6652          for (DatanodeDescriptor dn : live) {
6653            usages[i++] = dn.getDfsUsedPercent();
6654            totalDfsUsed += dn.getDfsUsedPercent();
6655          }
6656          totalDfsUsed /= live.size();
6657          Arrays.sort(usages);
6658          median = usages[usages.length / 2];
6659          max = usages[usages.length - 1];
6660          min = usages[0];
6661    
6662          for (i = 0; i < usages.length; i++) {
6663            dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
6664          }
6665          dev = (float) Math.sqrt(dev / usages.length);
6666        }
6667    
6668        final Map<String, Object> innerInfo = new HashMap<String, Object>();
6669        innerInfo.put("min", StringUtils.format("%.2f%%", min));
6670        innerInfo.put("median", StringUtils.format("%.2f%%", median));
6671        innerInfo.put("max", StringUtils.format("%.2f%%", max));
6672        innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
6673        info.put("nodeUsage", innerInfo);
6674    
6675        return JSON.toString(info);
6676      }
6677    
6678      @Override  // NameNodeMXBean
6679      public String getNameJournalStatus() {
6680        List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
6681        FSEditLog log = getFSImage().getEditLog();
6682        if (log != null) {
6683          boolean openForWrite = log.isOpenForWrite();
6684          for (JournalAndStream jas : log.getJournals()) {
6685            final Map<String, String> jasMap = new HashMap<String, String>();
6686            String manager = jas.getManager().toString();
6687    
6688            jasMap.put("required", String.valueOf(jas.isRequired()));
6689            jasMap.put("disabled", String.valueOf(jas.isDisabled()));
6690            jasMap.put("manager", manager);
6691    
6692            if (jas.isDisabled()) {
6693              jasMap.put("stream", "Failed");
6694            } else if (openForWrite) {
6695              EditLogOutputStream elos = jas.getCurrentStream();
6696              if (elos != null) {
6697                jasMap.put("stream", elos.generateReport());
6698              } else {
6699                jasMap.put("stream", "not currently writing");
6700              }
6701            } else {
6702              jasMap.put("stream", "open for read");
6703            }
6704            jasList.add(jasMap);
6705          }
6706        }
6707        return JSON.toString(jasList);
6708      }
6709    
6710      @Override // NameNodeMxBean
6711      public String getJournalTransactionInfo() {
6712        Map<String, String> txnIdMap = new HashMap<String, String>();
6713        txnIdMap.put("LastAppliedOrWrittenTxId",
6714            Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
6715        txnIdMap.put("MostRecentCheckpointTxId",
6716            Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
6717        return JSON.toString(txnIdMap);
6718      }
6719      
6720      @Override  // NameNodeMXBean
6721      public String getNNStarted() {
6722        return getStartTime().toString();
6723      }
6724    
6725      @Override  // NameNodeMXBean
6726      public String getCompileInfo() {
6727        return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
6728            " from " + VersionInfo.getBranch();
6729      }
6730    
6731      /** @return the block manager. */
6732      public BlockManager getBlockManager() {
6733        return blockManager;
6734      }
6735      /** @return the FSDirectory. */
6736      public FSDirectory getFSDirectory() {
6737        return dir;
6738      }
6739      /** @return the cache manager. */
6740      public CacheManager getCacheManager() {
6741        return cacheManager;
6742      }
6743    
6744      @Override  // NameNodeMXBean
6745      public String getCorruptFiles() {
6746        List<String> list = new ArrayList<String>();
6747        Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
6748        try {
6749          corruptFileBlocks = listCorruptFileBlocks("/", null);
6750          int corruptFileCount = corruptFileBlocks.size();
6751          if (corruptFileCount != 0) {
6752            for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
6753              list.add(c.toString());
6754            }
6755          }
6756        } catch (IOException e) {
6757          LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
6758        }
6759        return JSON.toString(list);
6760      }
6761    
6762      @Override  //NameNodeMXBean
6763      public int getDistinctVersionCount() {
6764        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
6765          .size();
6766      }
6767    
6768      @Override  //NameNodeMXBean
6769      public Map<String, Integer> getDistinctVersions() {
6770        return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
6771      }
6772    
6773      @Override  //NameNodeMXBean
6774      public String getSoftwareVersion() {
6775        return VersionInfo.getVersion();
6776      }
6777    
6778      /**
6779       * Verifies that the given identifier and password are valid and match.
6780       * @param identifier Token identifier.
6781       * @param password Password in the token.
6782       */
6783      public synchronized void verifyToken(DelegationTokenIdentifier identifier,
6784          byte[] password) throws InvalidToken, RetriableException {
6785        try {
6786          getDelegationTokenSecretManager().verifyToken(identifier, password);
6787        } catch (InvalidToken it) {
6788          if (inTransitionToActive()) {
6789            throw new RetriableException(it);
6790          }
6791          throw it;
6792        }
6793      }
6794      
6795      @Override
6796      public boolean isGenStampInFuture(Block block) {
6797        if (isLegacyBlock(block)) {
6798          return block.getGenerationStamp() > getGenerationStampV1();
6799        } else {
6800          return block.getGenerationStamp() > getGenerationStampV2();
6801        }
6802      }
6803    
6804      @VisibleForTesting
6805      public EditLogTailer getEditLogTailer() {
6806        return editLogTailer;
6807      }
6808      
6809      @VisibleForTesting
6810      public void setEditLogTailerForTests(EditLogTailer tailer) {
6811        this.editLogTailer = tailer;
6812      }
6813      
6814      @VisibleForTesting
6815      void setFsLockForTests(ReentrantReadWriteLock lock) {
6816        this.fsLock.coarseLock = lock;
6817      }
6818      
6819      @VisibleForTesting
6820      public ReentrantReadWriteLock getFsLockForTests() {
6821        return fsLock.coarseLock;
6822      }
6823      
6824      @VisibleForTesting
6825      public ReentrantLock getLongReadLockForTests() {
6826        return fsLock.longReadLock;
6827      }
6828    
6829      @VisibleForTesting
6830      public SafeModeInfo getSafeModeInfoForTests() {
6831        return safeMode;
6832      }
6833      
6834      @VisibleForTesting
6835      public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
6836        this.nnResourceChecker = nnResourceChecker;
6837      }
6838    
6839      @Override
6840      public boolean isAvoidingStaleDataNodesForWrite() {
6841        return this.blockManager.getDatanodeManager()
6842            .shouldAvoidStaleDataNodesForWrite();
6843      }
6844    
6845      @Override // FSClusterStats
6846      public int getNumDatanodesInService() {
6847        return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
6848      }
6849    
6850      public SnapshotManager getSnapshotManager() {
6851        return snapshotManager;
6852      }
6853      
6854      /** Allow snapshot on a directroy. */
6855      void allowSnapshot(String path) throws SafeModeException, IOException {
6856        checkOperation(OperationCategory.WRITE);
6857        writeLock();
6858        try {
6859          checkOperation(OperationCategory.WRITE);
6860          checkNameNodeSafeMode("Cannot allow snapshot for " + path);
6861          checkSuperuserPrivilege();
6862    
6863          dir.writeLock();
6864          try {
6865            snapshotManager.setSnapshottable(path, true);
6866          } finally {
6867            dir.writeUnlock();
6868          }
6869          getEditLog().logAllowSnapshot(path);
6870        } finally {
6871          writeUnlock();
6872        }
6873        getEditLog().logSync();
6874    
6875        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6876          logAuditEvent(true, "allowSnapshot", path, null, null);
6877        }
6878      }
6879      
6880      /** Disallow snapshot on a directory. */
6881      void disallowSnapshot(String path) throws SafeModeException, IOException {
6882        checkOperation(OperationCategory.WRITE);
6883        writeLock();
6884        try {
6885          checkOperation(OperationCategory.WRITE);
6886          checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
6887          checkSuperuserPrivilege();
6888    
6889          dir.writeLock();
6890          try {
6891            snapshotManager.resetSnapshottable(path);
6892          } finally {
6893            dir.writeUnlock();
6894          }
6895          getEditLog().logDisallowSnapshot(path);
6896        } finally {
6897          writeUnlock();
6898        }
6899        getEditLog().logSync();
6900        
6901        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6902          logAuditEvent(true, "disallowSnapshot", path, null, null);
6903        }
6904      }
6905      
6906      /**
6907       * Create a snapshot
6908       * @param snapshotRoot The directory path where the snapshot is taken
6909       * @param snapshotName The name of the snapshot
6910       */
6911      String createSnapshot(String snapshotRoot, String snapshotName)
6912          throws SafeModeException, IOException {
6913        checkOperation(OperationCategory.WRITE);
6914        final FSPermissionChecker pc = getPermissionChecker();
6915        CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6916            null);
6917        if (cacheEntry != null && cacheEntry.isSuccess()) {
6918          return (String) cacheEntry.getPayload();
6919        }
6920        writeLock();
6921        String snapshotPath = null;
6922        try {
6923          checkOperation(OperationCategory.WRITE);
6924          checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
6925          if (isPermissionEnabled) {
6926            checkOwner(pc, snapshotRoot);
6927          }
6928    
6929          if (snapshotName == null || snapshotName.isEmpty()) {
6930            snapshotName = Snapshot.generateDefaultSnapshotName();
6931          }
6932          if(snapshotName != null){
6933            if (!DFSUtil.isValidNameForComponent(snapshotName)) {
6934                throw new InvalidPathException("Invalid snapshot name: "
6935                    + snapshotName);
6936            }
6937          }
6938          dir.verifySnapshotName(snapshotName, snapshotRoot);
6939          dir.writeLock();
6940          try {
6941            snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
6942          } finally {
6943            dir.writeUnlock();
6944          }
6945          getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
6946              cacheEntry != null);
6947        } finally {
6948          writeUnlock();
6949          RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
6950        }
6951        getEditLog().logSync();
6952        
6953        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6954          logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
6955        }
6956        return snapshotPath;
6957      }
6958      
6959      /**
6960       * Rename a snapshot
6961       * @param path The directory path where the snapshot was taken
6962       * @param snapshotOldName Old snapshot name
6963       * @param snapshotNewName New snapshot name
6964       * @throws SafeModeException
6965       * @throws IOException 
6966       */
6967      void renameSnapshot(String path, String snapshotOldName,
6968          String snapshotNewName) throws SafeModeException, IOException {
6969        checkOperation(OperationCategory.WRITE);
6970        final FSPermissionChecker pc = getPermissionChecker();
6971        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6972        if (cacheEntry != null && cacheEntry.isSuccess()) {
6973          return; // Return previous response
6974        }
6975        writeLock();
6976        boolean success = false;
6977        try {
6978          checkOperation(OperationCategory.WRITE);
6979          checkNameNodeSafeMode("Cannot rename snapshot for " + path);
6980          if (isPermissionEnabled) {
6981            checkOwner(pc, path);
6982          }
6983          dir.verifySnapshotName(snapshotNewName, path);
6984          
6985          snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
6986          getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
6987              cacheEntry != null);
6988          success = true;
6989        } finally {
6990          writeUnlock();
6991          RetryCache.setState(cacheEntry, success);
6992        }
6993        getEditLog().logSync();
6994        
6995        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6996          String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
6997          String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
6998          logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
6999        }
7000      }
7001      
7002      /**
7003       * Get the list of snapshottable directories that are owned 
7004       * by the current user. Return all the snapshottable directories if the 
7005       * current user is a super user.
7006       * @return The list of all the current snapshottable directories
7007       * @throws IOException
7008       */
7009      public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
7010          throws IOException {
7011        SnapshottableDirectoryStatus[] status = null;
7012        checkOperation(OperationCategory.READ);
7013        final FSPermissionChecker checker = getPermissionChecker();
7014        readLock();
7015        try {
7016          checkOperation(OperationCategory.READ);
7017          final String user = checker.isSuperUser()? null : checker.getUser();
7018          status = snapshotManager.getSnapshottableDirListing(user);
7019        } finally {
7020          readUnlock();
7021        }
7022        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7023          logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
7024        }
7025        return status;
7026      }
7027      
7028      /**
7029       * Get the difference between two snapshots (or between a snapshot and the
7030       * current status) of a snapshottable directory.
7031       * 
7032       * @param path The full path of the snapshottable directory.
7033       * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
7034       *          or empty string indicates the current tree.
7035       * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
7036       *          empty string indicates the current tree.
7037       * @return A report about the difference between {@code fromSnapshot} and 
7038       *         {@code toSnapshot}. Modified/deleted/created/renamed files and 
7039       *         directories belonging to the snapshottable directories are listed 
7040       *         and labeled as M/-/+/R respectively. 
7041       * @throws IOException
7042       */
7043      SnapshotDiffReport getSnapshotDiffReport(String path,
7044          String fromSnapshot, String toSnapshot) throws IOException {
7045        SnapshotDiffInfo diffs = null;
7046        checkOperation(OperationCategory.READ);
7047        final FSPermissionChecker pc = getPermissionChecker();
7048        readLock();
7049        try {
7050          checkOperation(OperationCategory.READ);
7051          if (isPermissionEnabled) {
7052            checkSubtreeReadPermission(pc, path, fromSnapshot);
7053            checkSubtreeReadPermission(pc, path, toSnapshot);
7054          }
7055          diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
7056        } finally {
7057          readUnlock();
7058        }
7059        
7060        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7061          logAuditEvent(true, "computeSnapshotDiff", null, null, null);
7062        }
7063        return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
7064            path, fromSnapshot, toSnapshot,
7065            Collections.<DiffReportEntry> emptyList());
7066      }
7067      
7068      private void checkSubtreeReadPermission(final FSPermissionChecker pc,
7069          final String snapshottablePath, final String snapshot)
7070              throws AccessControlException, UnresolvedLinkException {
7071        final String fromPath = snapshot == null?
7072            snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
7073        checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
7074      }
7075      
7076      /**
7077       * Delete a snapshot of a snapshottable directory
7078       * @param snapshotRoot The snapshottable directory
7079       * @param snapshotName The name of the to-be-deleted snapshot
7080       * @throws SafeModeException
7081       * @throws IOException
7082       */
7083      void deleteSnapshot(String snapshotRoot, String snapshotName)
7084          throws SafeModeException, IOException {
7085        checkOperation(OperationCategory.WRITE);
7086        final FSPermissionChecker pc = getPermissionChecker();
7087        
7088        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7089        if (cacheEntry != null && cacheEntry.isSuccess()) {
7090          return; // Return previous response
7091        }
7092        boolean success = false;
7093        BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
7094        writeLock();
7095        try {
7096          checkOperation(OperationCategory.WRITE);
7097          checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
7098          if (isPermissionEnabled) {
7099            checkOwner(pc, snapshotRoot);
7100          }
7101    
7102          List<INode> removedINodes = new ChunkedArrayList<INode>();
7103          dir.writeLock();
7104          try {
7105            snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
7106                collectedBlocks, removedINodes);
7107            dir.removeFromInodeMap(removedINodes);
7108          } finally {
7109            dir.writeUnlock();
7110          }
7111          removedINodes.clear();
7112          getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
7113              cacheEntry != null);
7114          success = true;
7115        } finally {
7116          writeUnlock();
7117          RetryCache.setState(cacheEntry, success);
7118        }
7119        getEditLog().logSync();
7120    
7121        removeBlocks(collectedBlocks);
7122        collectedBlocks.clear();
7123    
7124        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7125          String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
7126          logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
7127        }
7128      }
7129    
7130      /**
7131       * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
7132       * @param toRemove the list of INodeDirectorySnapshottable to be removed
7133       */
7134      void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) {
7135        if (snapshotManager != null) {
7136          snapshotManager.removeSnapshottable(toRemove);
7137        }
7138      }
7139    
7140      RollingUpgradeInfo queryRollingUpgrade() throws IOException {
7141        checkSuperuserPrivilege();
7142        checkOperation(OperationCategory.READ);
7143        readLock();
7144        try {
7145          if (rollingUpgradeInfo != null) {
7146            boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
7147            rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
7148          }
7149          return rollingUpgradeInfo;
7150        } finally {
7151          readUnlock();
7152        }
7153      }
7154    
7155      RollingUpgradeInfo startRollingUpgrade() throws IOException {
7156        checkSuperuserPrivilege();
7157        checkOperation(OperationCategory.WRITE);
7158        writeLock();
7159        try {
7160          checkOperation(OperationCategory.WRITE);
7161          long startTime = now();
7162          if (!haEnabled) { // for non-HA, we require NN to be in safemode
7163            startRollingUpgradeInternalForNonHA(startTime);
7164          } else { // for HA, NN cannot be in safemode
7165            checkNameNodeSafeMode("Failed to start rolling upgrade");
7166            startRollingUpgradeInternal(startTime);
7167          }
7168    
7169          getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
7170          if (haEnabled) {
7171            // roll the edit log to make sure the standby NameNode can tail
7172            getFSImage().rollEditLog();
7173          }
7174        } finally {
7175          writeUnlock();
7176        }
7177    
7178        getEditLog().logSync();
7179        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7180          logAuditEvent(true, "startRollingUpgrade", null, null, null);
7181        }
7182        return rollingUpgradeInfo;
7183      }
7184    
7185      /**
7186       * Update internal state to indicate that a rolling upgrade is in progress.
7187       * @param startTime
7188       */
7189      void startRollingUpgradeInternal(long startTime)
7190          throws IOException {
7191        checkRollingUpgrade("start rolling upgrade");
7192        getFSImage().checkUpgrade(this);
7193        setRollingUpgradeInfo(false, startTime);
7194      }
7195    
7196      /**
7197       * Update internal state to indicate that a rolling upgrade is in progress for
7198       * non-HA setup. This requires the namesystem is in SafeMode and after doing a
7199       * checkpoint for rollback the namesystem will quit the safemode automatically 
7200       */
7201      private void startRollingUpgradeInternalForNonHA(long startTime)
7202          throws IOException {
7203        Preconditions.checkState(!haEnabled);
7204        if (!isInSafeMode()) {
7205          throw new IOException("Safe mode should be turned ON "
7206              + "in order to create namespace image.");
7207        }
7208        checkRollingUpgrade("start rolling upgrade");
7209        getFSImage().checkUpgrade(this);
7210        // in non-HA setup, we do an extra ckpt to generate a rollback image
7211        getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
7212        LOG.info("Successfully saved namespace for preparing rolling upgrade.");
7213    
7214        // leave SafeMode automatically
7215        setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
7216        setRollingUpgradeInfo(true, startTime);
7217      }
7218    
7219      void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
7220        rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
7221            createdRollbackImages, startTime, 0L);
7222      }
7223    
7224      public void setCreatedRollbackImages(boolean created) {
7225        if (rollingUpgradeInfo != null) {
7226          rollingUpgradeInfo.setCreatedRollbackImages(created);
7227        }
7228      }
7229    
7230      public RollingUpgradeInfo getRollingUpgradeInfo() {
7231        return rollingUpgradeInfo;
7232      }
7233    
7234      public boolean isNeedRollbackFsImage() {
7235        return needRollbackFsImage;
7236      }
7237    
7238      public void setNeedRollbackFsImage(boolean needRollbackFsImage) {
7239        this.needRollbackFsImage = needRollbackFsImage;
7240      }
7241    
7242      @Override  // NameNodeMXBean
7243      public RollingUpgradeInfo.Bean getRollingUpgradeStatus() {
7244        readLock();
7245        try {
7246          RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo();
7247          if (upgradeInfo != null) {
7248            return new RollingUpgradeInfo.Bean(upgradeInfo);
7249          }
7250          return null;
7251        } finally {
7252          readUnlock();
7253        }
7254      }
7255    
7256      /** Is rolling upgrade in progress? */
7257      public boolean isRollingUpgrade() {
7258        return rollingUpgradeInfo != null;
7259      }
7260    
7261      void checkRollingUpgrade(String action) throws RollingUpgradeException {
7262        if (isRollingUpgrade()) {
7263          throw new RollingUpgradeException("Failed to " + action
7264              + " since a rolling upgrade is already in progress."
7265              + " Existing rolling upgrade info:\n" + rollingUpgradeInfo);
7266        }
7267      }
7268    
7269      RollingUpgradeInfo finalizeRollingUpgrade() throws IOException {
7270        checkSuperuserPrivilege();
7271        checkOperation(OperationCategory.WRITE);
7272        writeLock();
7273        final RollingUpgradeInfo returnInfo;
7274        try {
7275          checkOperation(OperationCategory.WRITE);
7276          checkNameNodeSafeMode("Failed to finalize rolling upgrade");
7277    
7278          returnInfo = finalizeRollingUpgradeInternal(now());
7279          getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
7280          getFSImage().saveNamespace(this);
7281          getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
7282              NameNodeFile.IMAGE);
7283        } finally {
7284          writeUnlock();
7285        }
7286    
7287        // getEditLog().logSync() is not needed since it does saveNamespace 
7288    
7289        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7290          logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);
7291        }
7292        return returnInfo;
7293      }
7294    
7295      RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime)
7296          throws RollingUpgradeException {
7297        if (!isRollingUpgrade()) {
7298          throw new RollingUpgradeException(
7299              "Failed to finalize rolling upgrade since there is no rolling upgrade in progress.");
7300        }
7301    
7302        final long startTime = rollingUpgradeInfo.getStartTime();
7303        rollingUpgradeInfo = null;
7304        return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
7305      }
7306    
7307      long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
7308          throws IOException {
7309        checkOperation(OperationCategory.WRITE);
7310        final FSPermissionChecker pc = isPermissionEnabled ?
7311            getPermissionChecker() : null;
7312        CacheEntryWithPayload cacheEntry =
7313            RetryCache.waitForCompletion(retryCache, null);
7314        if (cacheEntry != null && cacheEntry.isSuccess()) {
7315          return (Long) cacheEntry.getPayload();
7316        }
7317        boolean success = false;
7318        if (!flags.contains(CacheFlag.FORCE)) {
7319          cacheManager.waitForRescanIfNeeded();
7320        }
7321        writeLock();
7322        Long result = null;
7323        try {
7324          checkOperation(OperationCategory.WRITE);
7325          if (isInSafeMode()) {
7326            throw new SafeModeException(
7327                "Cannot add cache directive", safeMode);
7328          }
7329          if (directive.getId() != null) {
7330            throw new IOException("addDirective: you cannot specify an ID " +
7331                "for this operation.");
7332          }
7333          CacheDirectiveInfo effectiveDirective = 
7334              cacheManager.addDirective(directive, pc, flags);
7335          getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
7336              cacheEntry != null);
7337          result = effectiveDirective.getId();
7338          success = true;
7339        } finally {
7340          writeUnlock();
7341          if (success) {
7342            getEditLog().logSync();
7343          }
7344          if (isAuditEnabled() && isExternalInvocation()) {
7345            logAuditEvent(success, "addCacheDirective", null, null, null);
7346          }
7347          RetryCache.setState(cacheEntry, success, result);
7348        }
7349        return result;
7350      }
7351    
7352      void modifyCacheDirective(CacheDirectiveInfo directive,
7353          EnumSet<CacheFlag> flags) throws IOException {
7354        checkOperation(OperationCategory.WRITE);
7355        final FSPermissionChecker pc = isPermissionEnabled ?
7356            getPermissionChecker() : null;
7357        boolean success = false;
7358        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7359        if (cacheEntry != null && cacheEntry.isSuccess()) {
7360          return;
7361        }
7362        if (!flags.contains(CacheFlag.FORCE)) {
7363          cacheManager.waitForRescanIfNeeded();
7364        }
7365        writeLock();
7366        try {
7367          checkOperation(OperationCategory.WRITE);
7368          if (isInSafeMode()) {
7369            throw new SafeModeException(
7370                "Cannot add cache directive", safeMode);
7371          }
7372          cacheManager.modifyDirective(directive, pc, flags);
7373          getEditLog().logModifyCacheDirectiveInfo(directive,
7374              cacheEntry != null);
7375          success = true;
7376        } finally {
7377          writeUnlock();
7378          if (success) {
7379            getEditLog().logSync();
7380          }
7381          if (isAuditEnabled() && isExternalInvocation()) {
7382            logAuditEvent(success, "modifyCacheDirective", null, null, null);
7383          }
7384          RetryCache.setState(cacheEntry, success);
7385        }
7386      }
7387    
7388      void removeCacheDirective(Long id) throws IOException {
7389        checkOperation(OperationCategory.WRITE);
7390        final FSPermissionChecker pc = isPermissionEnabled ?
7391            getPermissionChecker() : null;
7392        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7393        if (cacheEntry != null && cacheEntry.isSuccess()) {
7394          return;
7395        }
7396        boolean success = false;
7397        writeLock();
7398        try {
7399          checkOperation(OperationCategory.WRITE);
7400          if (isInSafeMode()) {
7401            throw new SafeModeException(
7402                "Cannot remove cache directives", safeMode);
7403          }
7404          cacheManager.removeDirective(id, pc);
7405          getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
7406          success = true;
7407        } finally {
7408          writeUnlock();
7409          if (isAuditEnabled() && isExternalInvocation()) {
7410            logAuditEvent(success, "removeCacheDirective", null, null,
7411                null);
7412          }
7413          RetryCache.setState(cacheEntry, success);
7414        }
7415        getEditLog().logSync();
7416      }
7417    
7418      BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
7419          long startId, CacheDirectiveInfo filter) throws IOException {
7420        checkOperation(OperationCategory.READ);
7421        final FSPermissionChecker pc = isPermissionEnabled ?
7422            getPermissionChecker() : null;
7423        BatchedListEntries<CacheDirectiveEntry> results;
7424        cacheManager.waitForRescanIfNeeded();
7425        readLock();
7426        boolean success = false;
7427        try {
7428          checkOperation(OperationCategory.READ);
7429          results =
7430              cacheManager.listCacheDirectives(startId, filter, pc);
7431          success = true;
7432        } finally {
7433          readUnlock();
7434          if (isAuditEnabled() && isExternalInvocation()) {
7435            logAuditEvent(success, "listCacheDirectives", null, null,
7436                null);
7437          }
7438        }
7439        return results;
7440      }
7441    
7442      public void addCachePool(CachePoolInfo req) throws IOException {
7443        checkOperation(OperationCategory.WRITE);
7444        final FSPermissionChecker pc = isPermissionEnabled ?
7445            getPermissionChecker() : null;
7446        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7447        if (cacheEntry != null && cacheEntry.isSuccess()) {
7448          return; // Return previous response
7449        }
7450        writeLock();
7451        boolean success = false;
7452        try {
7453          checkOperation(OperationCategory.WRITE);
7454          if (isInSafeMode()) {
7455            throw new SafeModeException(
7456                "Cannot add cache pool " + req.getPoolName(), safeMode);
7457          }
7458          if (pc != null) {
7459            pc.checkSuperuserPrivilege();
7460          }
7461          CachePoolInfo info = cacheManager.addCachePool(req);
7462          getEditLog().logAddCachePool(info, cacheEntry != null);
7463          success = true;
7464        } finally {
7465          writeUnlock();
7466          if (isAuditEnabled() && isExternalInvocation()) {
7467            logAuditEvent(success, "addCachePool", req.getPoolName(), null, null);
7468          }
7469          RetryCache.setState(cacheEntry, success);
7470        }
7471        
7472        getEditLog().logSync();
7473      }
7474    
7475      public void modifyCachePool(CachePoolInfo req) throws IOException {
7476        checkOperation(OperationCategory.WRITE);
7477        final FSPermissionChecker pc =
7478            isPermissionEnabled ? getPermissionChecker() : null;
7479        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7480        if (cacheEntry != null && cacheEntry.isSuccess()) {
7481          return; // Return previous response
7482        }
7483        writeLock();
7484        boolean success = false;
7485        try {
7486          checkOperation(OperationCategory.WRITE);
7487          if (isInSafeMode()) {
7488            throw new SafeModeException(
7489                "Cannot modify cache pool " + req.getPoolName(), safeMode);
7490          }
7491          if (pc != null) {
7492            pc.checkSuperuserPrivilege();
7493          }
7494          cacheManager.modifyCachePool(req);
7495          getEditLog().logModifyCachePool(req, cacheEntry != null);
7496          success = true;
7497        } finally {
7498          writeUnlock();
7499          if (isAuditEnabled() && isExternalInvocation()) {
7500            logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null);
7501          }
7502          RetryCache.setState(cacheEntry, success);
7503        }
7504    
7505        getEditLog().logSync();
7506      }
7507    
7508      public void removeCachePool(String cachePoolName) throws IOException {
7509        checkOperation(OperationCategory.WRITE);
7510        final FSPermissionChecker pc =
7511            isPermissionEnabled ? getPermissionChecker() : null;
7512        CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7513        if (cacheEntry != null && cacheEntry.isSuccess()) {
7514          return; // Return previous response
7515        }
7516        writeLock();
7517        boolean success = false;
7518        try {
7519          checkOperation(OperationCategory.WRITE);
7520          if (isInSafeMode()) {
7521            throw new SafeModeException(
7522                "Cannot remove cache pool " + cachePoolName, safeMode);
7523          }
7524          if (pc != null) {
7525            pc.checkSuperuserPrivilege();
7526          }
7527          cacheManager.removeCachePool(cachePoolName);
7528          getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
7529          success = true;
7530        } finally {
7531          writeUnlock();
7532          if (isAuditEnabled() && isExternalInvocation()) {
7533            logAuditEvent(success, "removeCachePool", cachePoolName, null, null);
7534          }
7535          RetryCache.setState(cacheEntry, success);
7536        }
7537        
7538        getEditLog().logSync();
7539      }
7540    
7541      public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
7542          throws IOException {
7543        final FSPermissionChecker pc =
7544            isPermissionEnabled ? getPermissionChecker() : null;
7545        BatchedListEntries<CachePoolEntry> results;
7546        checkOperation(OperationCategory.READ);
7547        boolean success = false;
7548        cacheManager.waitForRescanIfNeeded();
7549        readLock();
7550        try {
7551          checkOperation(OperationCategory.READ);
7552          results = cacheManager.listCachePools(pc, prevKey);
7553          success = true;
7554        } finally {
7555          readUnlock();
7556          if (isAuditEnabled() && isExternalInvocation()) {
7557            logAuditEvent(success, "listCachePools", null, null, null);
7558          }
7559        }
7560        return results;
7561      }
7562    
7563      void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7564        aclConfigFlag.checkForApiCall();
7565        HdfsFileStatus resultingStat = null;
7566        FSPermissionChecker pc = getPermissionChecker();
7567        checkOperation(OperationCategory.WRITE);
7568        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7569        writeLock();
7570        try {
7571          checkOperation(OperationCategory.WRITE);
7572          checkNameNodeSafeMode("Cannot modify ACL entries on " + src);
7573          src = FSDirectory.resolvePath(src, pathComponents, dir);
7574          checkOwner(pc, src);
7575          dir.modifyAclEntries(src, aclSpec);
7576          resultingStat = getAuditFileInfo(src, false);
7577        } finally {
7578          writeUnlock();
7579        }
7580        getEditLog().logSync();
7581        logAuditEvent(true, "modifyAclEntries", src, null, resultingStat);
7582      }
7583    
7584      void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7585        aclConfigFlag.checkForApiCall();
7586        HdfsFileStatus resultingStat = null;
7587        FSPermissionChecker pc = getPermissionChecker();
7588        checkOperation(OperationCategory.WRITE);
7589        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7590        writeLock();
7591        try {
7592          checkOperation(OperationCategory.WRITE);
7593          checkNameNodeSafeMode("Cannot remove ACL entries on " + src);
7594          src = FSDirectory.resolvePath(src, pathComponents, dir);
7595          checkOwner(pc, src);
7596          dir.removeAclEntries(src, aclSpec);
7597          resultingStat = getAuditFileInfo(src, false);
7598        } finally {
7599          writeUnlock();
7600        }
7601        getEditLog().logSync();
7602        logAuditEvent(true, "removeAclEntries", src, null, resultingStat);
7603      }
7604    
7605      void removeDefaultAcl(String src) throws IOException {
7606        aclConfigFlag.checkForApiCall();
7607        HdfsFileStatus resultingStat = null;
7608        FSPermissionChecker pc = getPermissionChecker();
7609        checkOperation(OperationCategory.WRITE);
7610        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7611        writeLock();
7612        try {
7613          checkOperation(OperationCategory.WRITE);
7614          checkNameNodeSafeMode("Cannot remove default ACL entries on " + src);
7615          src = FSDirectory.resolvePath(src, pathComponents, dir);
7616          checkOwner(pc, src);
7617          dir.removeDefaultAcl(src);
7618          resultingStat = getAuditFileInfo(src, false);
7619        } finally {
7620          writeUnlock();
7621        }
7622        getEditLog().logSync();
7623        logAuditEvent(true, "removeDefaultAcl", src, null, resultingStat);
7624      }
7625    
7626      void removeAcl(String src) throws IOException {
7627        aclConfigFlag.checkForApiCall();
7628        HdfsFileStatus resultingStat = null;
7629        FSPermissionChecker pc = getPermissionChecker();
7630        checkOperation(OperationCategory.WRITE);
7631        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7632        writeLock();
7633        try {
7634          checkOperation(OperationCategory.WRITE);
7635          checkNameNodeSafeMode("Cannot remove ACL on " + src);
7636          src = FSDirectory.resolvePath(src, pathComponents, dir);
7637          checkOwner(pc, src);
7638          dir.removeAcl(src);
7639          resultingStat = getAuditFileInfo(src, false);
7640        } finally {
7641          writeUnlock();
7642        }
7643        getEditLog().logSync();
7644        logAuditEvent(true, "removeAcl", src, null, resultingStat);
7645      }
7646    
7647      void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
7648        aclConfigFlag.checkForApiCall();
7649        HdfsFileStatus resultingStat = null;
7650        FSPermissionChecker pc = getPermissionChecker();
7651        checkOperation(OperationCategory.WRITE);
7652        byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7653        writeLock();
7654        try {
7655          checkOperation(OperationCategory.WRITE);
7656          checkNameNodeSafeMode("Cannot set ACL on " + src);
7657          src = FSDirectory.resolvePath(src, pathComponents, dir);
7658          checkOwner(pc, src);
7659          dir.setAcl(src, aclSpec);
7660          resultingStat = getAuditFileInfo(src, false);
7661        } finally {
7662          writeUnlock();
7663        }
7664        getEditLog().logSync();
7665        logAuditEvent(true, "setAcl", src, null, resultingStat);
7666      }
7667    
7668      AclStatus getAclStatus(String src) throws IOException {
7669        aclConfigFlag.checkForApiCall();
7670        FSPermissionChecker pc = getPermissionChecker();
7671        checkOperation(OperationCategory.READ);
7672        readLock();
7673        try {
7674          checkOperation(OperationCategory.READ);
7675          if (isPermissionEnabled) {
7676            checkPermission(pc, src, false, null, null, null, null);
7677          }
7678          return dir.getAclStatus(src);
7679        } finally {
7680          readUnlock();
7681        }
7682      }
7683    
7684      /**
7685       * Default AuditLogger implementation; used when no access logger is
7686       * defined in the config file. It can also be explicitly listed in the
7687       * config file.
7688       */
7689      private static class DefaultAuditLogger extends HdfsAuditLogger {
7690    
7691        private boolean logTokenTrackingId;
7692    
7693        @Override
7694        public void initialize(Configuration conf) {
7695          logTokenTrackingId = conf.getBoolean(
7696              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7697              DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
7698        }
7699    
7700        @Override
7701        public void logAuditEvent(boolean succeeded, String userName,
7702            InetAddress addr, String cmd, String src, String dst,
7703            FileStatus status, UserGroupInformation ugi,
7704            DelegationTokenSecretManager dtSecretManager) {
7705          if (auditLog.isInfoEnabled()) {
7706            final StringBuilder sb = auditBuffer.get();
7707            sb.setLength(0);
7708            sb.append("allowed=").append(succeeded).append("\t");
7709            sb.append("ugi=").append(userName).append("\t");
7710            sb.append("ip=").append(addr).append("\t");
7711            sb.append("cmd=").append(cmd).append("\t");
7712            sb.append("src=").append(src).append("\t");
7713            sb.append("dst=").append(dst).append("\t");
7714            if (null == status) {
7715              sb.append("perm=null");
7716            } else {
7717              sb.append("perm=");
7718              sb.append(status.getOwner()).append(":");
7719              sb.append(status.getGroup()).append(":");
7720              sb.append(status.getPermission());
7721            }
7722            if (logTokenTrackingId) {
7723              sb.append("\t").append("trackingId=");
7724              String trackingId = null;
7725              if (ugi != null && dtSecretManager != null
7726                  && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
7727                for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
7728                  if (tid instanceof DelegationTokenIdentifier) {
7729                    DelegationTokenIdentifier dtid =
7730                        (DelegationTokenIdentifier)tid;
7731                    trackingId = dtSecretManager.getTokenTrackingId(dtid);
7732                    break;
7733                  }
7734                }
7735              }
7736              sb.append(trackingId);
7737            }
7738            logAuditMessage(sb.toString());
7739          }
7740        }
7741    
7742        public void logAuditMessage(String message) {
7743          auditLog.info(message);
7744        }
7745      }
7746    
7747      private static void enableAsyncAuditLog() {
7748        if (!(auditLog instanceof Log4JLogger)) {
7749          LOG.warn("Log4j is required to enable async auditlog");
7750          return;
7751        }
7752        Logger logger = ((Log4JLogger)auditLog).getLogger();
7753        @SuppressWarnings("unchecked")
7754        List<Appender> appenders = Collections.list(logger.getAllAppenders());
7755        // failsafe against trying to async it more than once
7756        if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
7757          AsyncAppender asyncAppender = new AsyncAppender();
7758          // change logger to have an async appender containing all the
7759          // previously configured appenders
7760          for (Appender appender : appenders) {
7761            logger.removeAppender(appender);
7762            asyncAppender.addAppender(appender);
7763          }
7764          logger.addAppender(asyncAppender);        
7765        }
7766      }
7767    
7768    }
7769