001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
021 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
022 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
023 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
024 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
025 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
026 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
027 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
028 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT;
029 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY;
030 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
031 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
032 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT;
033 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
034 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
035 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
036 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
037 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
038 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
039 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT;
040 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY;
041 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
042 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
043 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
044 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
045 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
046 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
047 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
048 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
049 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
050 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
051 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
052 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
053 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
054 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
055 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
056 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
057 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
058 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
059 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
060 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
061 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
062 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
063 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
064 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
065 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT;
066 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY;
067 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY;
068 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
069 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
070 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
071 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
072 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
073 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
074 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
075 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT;
076 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
077 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
078 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
079 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
080 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
081 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
082 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
083 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
084 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
085 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
086 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
087 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
088 import static org.apache.hadoop.util.Time.now;
089
090 import java.io.BufferedWriter;
091 import java.io.ByteArrayInputStream;
092 import java.io.DataInput;
093 import java.io.DataInputStream;
094 import java.io.File;
095 import java.io.FileNotFoundException;
096 import java.io.FileOutputStream;
097 import java.io.IOException;
098 import java.io.OutputStreamWriter;
099 import java.io.PrintWriter;
100 import java.io.StringWriter;
101 import java.lang.management.ManagementFactory;
102 import java.net.InetAddress;
103 import java.net.URI;
104 import java.util.ArrayList;
105 import java.util.Arrays;
106 import java.util.Collection;
107 import java.util.Collections;
108 import java.util.Date;
109 import java.util.EnumSet;
110 import java.util.HashMap;
111 import java.util.HashSet;
112 import java.util.Iterator;
113 import java.util.LinkedHashSet;
114 import java.util.List;
115 import java.util.Map;
116 import java.util.Set;
117 import java.util.concurrent.TimeUnit;
118 import java.util.concurrent.locks.ReentrantLock;
119 import java.util.concurrent.locks.ReentrantReadWriteLock;
120
121 import javax.management.NotCompliantMBeanException;
122 import javax.management.ObjectName;
123 import javax.management.StandardMBean;
124
125 import org.apache.commons.logging.Log;
126 import org.apache.commons.logging.LogFactory;
127 import org.apache.commons.logging.impl.Log4JLogger;
128 import org.apache.hadoop.HadoopIllegalArgumentException;
129 import org.apache.hadoop.classification.InterfaceAudience;
130 import org.apache.hadoop.conf.Configuration;
131 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
132 import org.apache.hadoop.fs.CacheFlag;
133 import org.apache.hadoop.fs.ContentSummary;
134 import org.apache.hadoop.fs.CreateFlag;
135 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
136 import org.apache.hadoop.fs.FileAlreadyExistsException;
137 import org.apache.hadoop.fs.FileStatus;
138 import org.apache.hadoop.fs.FileSystem;
139 import org.apache.hadoop.fs.FsServerDefaults;
140 import org.apache.hadoop.fs.InvalidPathException;
141 import org.apache.hadoop.fs.Options;
142 import org.apache.hadoop.fs.Options.Rename;
143 import org.apache.hadoop.fs.ParentNotDirectoryException;
144 import org.apache.hadoop.fs.Path;
145 import org.apache.hadoop.fs.UnresolvedLinkException;
146 import org.apache.hadoop.fs.permission.AclEntry;
147 import org.apache.hadoop.fs.permission.AclStatus;
148 import org.apache.hadoop.fs.permission.FsAction;
149 import org.apache.hadoop.fs.permission.FsPermission;
150 import org.apache.hadoop.fs.permission.PermissionStatus;
151 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
152 import org.apache.hadoop.ha.ServiceFailedException;
153 import org.apache.hadoop.hdfs.DFSConfigKeys;
154 import org.apache.hadoop.hdfs.DFSUtil;
155 import org.apache.hadoop.hdfs.HAUtil;
156 import org.apache.hadoop.hdfs.HdfsConfiguration;
157 import org.apache.hadoop.hdfs.StorageType;
158 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
159 import org.apache.hadoop.hdfs.protocol.Block;
160 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
161 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
162 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
163 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
164 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
165 import org.apache.hadoop.hdfs.protocol.DatanodeID;
166 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
167 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
168 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
169 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
170 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
171 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
172 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
173 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
174 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
175 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
176 import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
177 import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
178 import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
179 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
180 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
181 import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
182 import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
183 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
184 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
185 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
186 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
187 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
188 import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
189 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
190 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
191 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
192 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
193 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
194 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
195 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
196 import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
197 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
198 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
199 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
200 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
201 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
202 import org.apache.hadoop.hdfs.server.common.Storage;
203 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
204 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
205 import org.apache.hadoop.hdfs.server.common.Util;
206 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
207 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
208 import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
209 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
210 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
211 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
212 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
213 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
214 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
215 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
216 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
217 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
218 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo;
219 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
220 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
221 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
222 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
223 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
224 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
225 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
226 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
227 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
228 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
229 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
230 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
231 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
232 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
233 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
234 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
235 import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
236 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
237 import org.apache.hadoop.hdfs.util.ChunkedArrayList;
238 import org.apache.hadoop.io.IOUtils;
239 import org.apache.hadoop.io.Text;
240 import org.apache.hadoop.ipc.RetriableException;
241 import org.apache.hadoop.ipc.RetryCache;
242 import org.apache.hadoop.ipc.RetryCache.CacheEntry;
243 import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
244 import org.apache.hadoop.ipc.Server;
245 import org.apache.hadoop.ipc.StandbyException;
246 import org.apache.hadoop.metrics2.annotation.Metric;
247 import org.apache.hadoop.metrics2.annotation.Metrics;
248 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
249 import org.apache.hadoop.metrics2.util.MBeans;
250 import org.apache.hadoop.net.NetworkTopology;
251 import org.apache.hadoop.net.Node;
252 import org.apache.hadoop.security.AccessControlException;
253 import org.apache.hadoop.security.UserGroupInformation;
254 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
255 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
256 import org.apache.hadoop.security.token.Token;
257 import org.apache.hadoop.security.token.TokenIdentifier;
258 import org.apache.hadoop.security.token.delegation.DelegationKey;
259 import org.apache.hadoop.util.Daemon;
260 import org.apache.hadoop.util.DataChecksum;
261 import org.apache.hadoop.util.StringUtils;
262 import org.apache.hadoop.util.Time;
263 import org.apache.hadoop.util.VersionInfo;
264 import org.apache.log4j.Appender;
265 import org.apache.log4j.AsyncAppender;
266 import org.apache.log4j.Logger;
267 import org.mortbay.util.ajax.JSON;
268
269 import com.google.common.annotations.VisibleForTesting;
270 import com.google.common.base.Charsets;
271 import com.google.common.base.Preconditions;
272 import com.google.common.collect.ImmutableMap;
273 import com.google.common.collect.Lists;
274
275 /***************************************************
276 * FSNamesystem does the actual bookkeeping work for the
277 * DataNode.
278 *
279 * It tracks several important tables.
280 *
281 * 1) valid fsname --> blocklist (kept on disk, logged)
282 * 2) Set of all valid blocks (inverted #1)
283 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports)
284 * 4) machine --> blocklist (inverted #2)
285 * 5) LRU cache of updated-heartbeat machines
286 ***************************************************/
287 @InterfaceAudience.Private
288 @Metrics(context="dfs")
289 public class FSNamesystem implements Namesystem, FSClusterStats,
290 FSNamesystemMBean, NameNodeMXBean {
291 public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
292
293 private static final ThreadLocal<StringBuilder> auditBuffer =
294 new ThreadLocal<StringBuilder>() {
295 @Override
296 protected StringBuilder initialValue() {
297 return new StringBuilder();
298 }
299 };
300
301 @VisibleForTesting
302 public boolean isAuditEnabled() {
303 return !isDefaultAuditLogger || auditLog.isInfoEnabled();
304 }
305
306 private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
307 throws IOException {
308 return (isAuditEnabled() && isExternalInvocation())
309 ? dir.getFileInfo(path, resolveSymlink) : null;
310 }
311
312 private void logAuditEvent(boolean succeeded, String cmd, String src)
313 throws IOException {
314 logAuditEvent(succeeded, cmd, src, null, null);
315 }
316
317 private void logAuditEvent(boolean succeeded, String cmd, String src,
318 String dst, HdfsFileStatus stat) throws IOException {
319 if (isAuditEnabled() && isExternalInvocation()) {
320 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
321 cmd, src, dst, stat);
322 }
323 }
324
325 private void logAuditEvent(boolean succeeded,
326 UserGroupInformation ugi, InetAddress addr, String cmd, String src,
327 String dst, HdfsFileStatus stat) {
328 FileStatus status = null;
329 if (stat != null) {
330 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
331 Path path = dst != null ? new Path(dst) : new Path(src);
332 status = new FileStatus(stat.getLen(), stat.isDir(),
333 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
334 stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
335 stat.getGroup(), symlink, path);
336 }
337 for (AuditLogger logger : auditLoggers) {
338 if (logger instanceof HdfsAuditLogger) {
339 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger;
340 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst,
341 status, ugi, dtSecretManager);
342 } else {
343 logger.logAuditEvent(succeeded, ugi.toString(), addr,
344 cmd, src, dst, status);
345 }
346 }
347 }
348
349 /**
350 * Logger for audit events, noting successful FSNamesystem operations. Emits
351 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated
352 * <code>key=value</code> pairs to be written for the following properties:
353 * <code>
354 * ugi=<ugi in RPC>
355 * ip=<remote IP>
356 * cmd=<command>
357 * src=<src path>
358 * dst=<dst path (optional)>
359 * perm=<permissions (optional)>
360 * </code>
361 */
362 public static final Log auditLog = LogFactory.getLog(
363 FSNamesystem.class.getName() + ".audit");
364
365 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
366 static int BLOCK_DELETION_INCREMENT = 1000;
367 private final boolean isPermissionEnabled;
368 private final UserGroupInformation fsOwner;
369 private final String fsOwnerShortUserName;
370 private final String supergroup;
371 private final boolean standbyShouldCheckpoint;
372
373 // Scan interval is not configurable.
374 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
375 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
376 final DelegationTokenSecretManager dtSecretManager;
377 private final boolean alwaysUseDelegationTokensForTests;
378
379 private static final Step STEP_AWAITING_REPORTED_BLOCKS =
380 new Step(StepType.AWAITING_REPORTED_BLOCKS);
381
382 // Tracks whether the default audit logger is the only configured audit
383 // logger; this allows isAuditEnabled() to return false in case the
384 // underlying logger is disabled, and avoid some unnecessary work.
385 private final boolean isDefaultAuditLogger;
386 private final List<AuditLogger> auditLoggers;
387
388 /** The namespace tree. */
389 FSDirectory dir;
390 private final BlockManager blockManager;
391 private final SnapshotManager snapshotManager;
392 private final CacheManager cacheManager;
393 private final DatanodeStatistics datanodeStatistics;
394
395 private RollingUpgradeInfo rollingUpgradeInfo = null;
396 /**
397 * A flag that indicates whether the checkpointer should checkpoint a rollback
398 * fsimage. The edit log tailer sets this flag. The checkpoint will create a
399 * rollback fsimage if the flag is true, and then change the flag to false.
400 */
401 private volatile boolean needRollbackFsImage;
402
403 // Block pool ID used by this namenode
404 private String blockPoolId;
405
406 final LeaseManager leaseManager = new LeaseManager(this);
407
408 volatile Daemon smmthread = null; // SafeModeMonitor thread
409
410 Daemon nnrmthread = null; // NamenodeResourceMonitor thread
411
412 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
413 /**
414 * When an active namenode will roll its own edit log, in # edits
415 */
416 private final long editLogRollerThreshold;
417 /**
418 * Check interval of an active namenode's edit log roller thread
419 */
420 private final int editLogRollerInterval;
421
422 private volatile boolean hasResourcesAvailable = false;
423 private volatile boolean fsRunning = true;
424
425 /** The start time of the namesystem. */
426 private final long startTime = now();
427
428 /** The interval of namenode checking for the disk space availability */
429 private final long resourceRecheckInterval;
430
431 // The actual resource checker instance.
432 NameNodeResourceChecker nnResourceChecker;
433
434 private final FsServerDefaults serverDefaults;
435 private final boolean supportAppends;
436 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
437
438 private volatile SafeModeInfo safeMode; // safe mode information
439
440 private final long maxFsObjects; // maximum number of fs objects
441
442 private final long minBlockSize; // minimum block size
443 private final long maxBlocksPerFile; // maximum # of blocks per file
444
445 /**
446 * The global generation stamp for legacy blocks with randomly
447 * generated block IDs.
448 */
449 private final GenerationStamp generationStampV1 = new GenerationStamp();
450
451 /**
452 * The global generation stamp for this file system.
453 */
454 private final GenerationStamp generationStampV2 = new GenerationStamp();
455
456 /**
457 * The value of the generation stamp when the first switch to sequential
458 * block IDs was made. Blocks with generation stamps below this value
459 * have randomly allocated block IDs. Blocks with generation stamps above
460 * this value had sequentially allocated block IDs. Read from the fsImage
461 * (or initialized as an offset from the V1 (legacy) generation stamp on
462 * upgrade).
463 */
464 private long generationStampV1Limit =
465 GenerationStamp.GRANDFATHER_GENERATION_STAMP;
466
467 /**
468 * The global block ID space for this file system.
469 */
470 @VisibleForTesting
471 private final SequentialBlockIdGenerator blockIdGenerator;
472
473 // precision of access times.
474 private final long accessTimePrecision;
475
476 /** Lock to protect FSNamesystem. */
477 private final FSNamesystemLock fsLock;
478
479 /**
480 * Used when this NN is in standby state to read from the shared edit log.
481 */
482 private EditLogTailer editLogTailer = null;
483
484 /**
485 * Used when this NN is in standby state to perform checkpoints.
486 */
487 private StandbyCheckpointer standbyCheckpointer;
488
489 /**
490 * Reference to the NN's HAContext object. This is only set once
491 * {@link #startCommonServices(Configuration, HAContext)} is called.
492 */
493 private HAContext haContext;
494
495 private final boolean haEnabled;
496
497 /** flag indicating whether replication queues have been initialized */
498 boolean initializedReplQueues = false;
499
500 /**
501 * Whether the namenode is in the middle of starting the active service
502 */
503 private volatile boolean startingActiveService = false;
504
505 private INodeId inodeId;
506
507 private final RetryCache retryCache;
508
509 private final AclConfigFlag aclConfigFlag;
510
511 /**
512 * Set the last allocated inode id when fsimage or editlog is loaded.
513 */
514 public void resetLastInodeId(long newValue) throws IOException {
515 try {
516 inodeId.skipTo(newValue);
517 } catch(IllegalStateException ise) {
518 throw new IOException(ise);
519 }
520 }
521
522 /** Should only be used for tests to reset to any value */
523 void resetLastInodeIdWithoutChecking(long newValue) {
524 inodeId.setCurrentValue(newValue);
525 }
526
527 /** @return the last inode ID. */
528 public long getLastInodeId() {
529 return inodeId.getCurrentValue();
530 }
531
532 /** Allocate a new inode ID. */
533 public long allocateNewInodeId() {
534 return inodeId.nextValue();
535 }
536
537 /**
538 * Clear all loaded data
539 */
540 void clear() {
541 dir.reset();
542 dtSecretManager.reset();
543 generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
544 generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP);
545 blockIdGenerator.setCurrentValue(
546 SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID);
547 generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
548 leaseManager.removeAllLeases();
549 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID);
550 snapshotManager.clearSnapshottableDirs();
551 cacheManager.clear();
552 }
553
554 @VisibleForTesting
555 LeaseManager getLeaseManager() {
556 return leaseManager;
557 }
558
559 boolean isHaEnabled() {
560 return haEnabled;
561 }
562
563 /**
564 * Check the supplied configuration for correctness.
565 * @param conf Supplies the configuration to validate.
566 * @throws IOException if the configuration could not be queried.
567 * @throws IllegalArgumentException if the configuration is invalid.
568 */
569 private static void checkConfiguration(Configuration conf)
570 throws IOException {
571
572 final Collection<URI> namespaceDirs =
573 FSNamesystem.getNamespaceDirs(conf);
574 final Collection<URI> editsDirs =
575 FSNamesystem.getNamespaceEditsDirs(conf);
576 final Collection<URI> requiredEditsDirs =
577 FSNamesystem.getRequiredNamespaceEditsDirs(conf);
578 final Collection<URI> sharedEditsDirs =
579 FSNamesystem.getSharedEditsDirs(conf);
580
581 for (URI u : requiredEditsDirs) {
582 if (u.toString().compareTo(
583 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) {
584 continue;
585 }
586
587 // Each required directory must also be in editsDirs or in
588 // sharedEditsDirs.
589 if (!editsDirs.contains(u) &&
590 !sharedEditsDirs.contains(u)) {
591 throw new IllegalArgumentException(
592 "Required edits directory " + u.toString() + " not present in " +
593 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " +
594 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" +
595 editsDirs.toString() + "; " +
596 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" +
597 requiredEditsDirs.toString() + ". " +
598 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" +
599 sharedEditsDirs.toString() + ".");
600 }
601 }
602
603 if (namespaceDirs.size() == 1) {
604 LOG.warn("Only one image storage directory ("
605 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss"
606 + " due to lack of redundant storage directories!");
607 }
608 if (editsDirs.size() == 1) {
609 LOG.warn("Only one namespace edits storage directory ("
610 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss"
611 + " due to lack of redundant storage directories!");
612 }
613 }
614
615 /**
616 * Instantiates an FSNamesystem loaded from the image and edits
617 * directories specified in the passed Configuration.
618 *
619 * @param conf the Configuration which specifies the storage directories
620 * from which to load
621 * @return an FSNamesystem which contains the loaded namespace
622 * @throws IOException if loading fails
623 */
624 static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
625
626 checkConfiguration(conf);
627 FSImage fsImage = new FSImage(conf,
628 FSNamesystem.getNamespaceDirs(conf),
629 FSNamesystem.getNamespaceEditsDirs(conf));
630 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false);
631 StartupOption startOpt = NameNode.getStartupOption(conf);
632 if (startOpt == StartupOption.RECOVER) {
633 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
634 }
635
636 long loadStart = now();
637 try {
638 namesystem.loadFSImage(startOpt);
639 } catch (IOException ioe) {
640 LOG.warn("Encountered exception loading fsimage", ioe);
641 fsImage.close();
642 throw ioe;
643 }
644 long timeTakenToLoadFSImage = now() - loadStart;
645 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
646 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
647 if (nnMetrics != null) {
648 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
649 }
650 return namesystem;
651 }
652
653 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
654 this(conf, fsImage, false);
655 }
656
657 /**
658 * Create an FSNamesystem associated with the specified image.
659 *
660 * Note that this does not load any data off of disk -- if you would
661 * like that behavior, use {@link #loadFromDisk(Configuration)}
662 *
663 * @param conf configuration
664 * @param fsImage The FSImage to associate with
665 * @param ignoreRetryCache Whether or not should ignore the retry cache setup
666 * step. For Secondary NN this should be set to true.
667 * @throws IOException on bad configuration
668 */
669 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache)
670 throws IOException {
671 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY,
672 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) {
673 LOG.info("Enabling async auditlog");
674 enableAsyncAuditLog();
675 }
676 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true);
677 LOG.info("fsLock is fair:" + fair);
678 fsLock = new FSNamesystemLock(fair);
679 try {
680 resourceRecheckInterval = conf.getLong(
681 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY,
682 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT);
683
684 this.blockManager = new BlockManager(this, this, conf);
685 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
686 this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager);
687
688 this.fsOwner = UserGroupInformation.getCurrentUser();
689 this.fsOwnerShortUserName = fsOwner.getShortUserName();
690 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY,
691 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
692 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
693 DFS_PERMISSIONS_ENABLED_DEFAULT);
694 LOG.info("fsOwner = " + fsOwner);
695 LOG.info("supergroup = " + supergroup);
696 LOG.info("isPermissionEnabled = " + isPermissionEnabled);
697
698 // block allocation has to be persisted in HA using a shared edits directory
699 // so that the standby has up-to-date namespace information
700 String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
701 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
702
703 // Sanity check the HA-related config.
704 if (nameserviceId != null) {
705 LOG.info("Determined nameservice ID: " + nameserviceId);
706 }
707 LOG.info("HA Enabled: " + haEnabled);
708 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
709 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
710 throw new IOException("Invalid configuration: a shared edits dir " +
711 "must not be specified if HA is not enabled.");
712 }
713
714 // Get the checksum type from config
715 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT);
716 DataChecksum.Type checksumType;
717 try {
718 checksumType = DataChecksum.Type.valueOf(checksumTypeStr);
719 } catch (IllegalArgumentException iae) {
720 throw new IOException("Invalid checksum type in "
721 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr);
722 }
723
724 this.serverDefaults = new FsServerDefaults(
725 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT),
726 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT),
727 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT),
728 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT),
729 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT),
730 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT),
731 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT),
732 checksumType);
733
734 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY,
735 DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
736
737 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY,
738 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT);
739 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY,
740 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT);
741 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
742 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
743 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
744 LOG.info("Append Enabled: " + supportAppends);
745
746 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
747
748 this.standbyShouldCheckpoint = conf.getBoolean(
749 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
750 // # edit autoroll threshold is a multiple of the checkpoint threshold
751 this.editLogRollerThreshold = (long)
752 (conf.getFloat(
753 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
754 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
755 conf.getLong(
756 DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
757 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
758 this.editLogRollerInterval = conf.getInt(
759 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
760 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
761 this.inodeId = new INodeId();
762
763 // For testing purposes, allow the DT secret manager to be started regardless
764 // of whether security is enabled.
765 alwaysUseDelegationTokensForTests = conf.getBoolean(
766 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
767 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
768
769 this.dtSecretManager = createDelegationTokenSecretManager(conf);
770 this.dir = new FSDirectory(fsImage, this, conf);
771 this.snapshotManager = new SnapshotManager(dir);
772 this.cacheManager = new CacheManager(this, conf, blockManager);
773 this.safeMode = new SafeModeInfo(conf);
774 this.auditLoggers = initAuditLoggers(conf);
775 this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
776 auditLoggers.get(0) instanceof DefaultAuditLogger;
777 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf);
778 this.aclConfigFlag = new AclConfigFlag(conf);
779 } catch(IOException e) {
780 LOG.error(getClass().getSimpleName() + " initialization failed.", e);
781 close();
782 throw e;
783 } catch (RuntimeException re) {
784 LOG.error(getClass().getSimpleName() + " initialization failed.", re);
785 close();
786 throw re;
787 }
788 }
789
790 @VisibleForTesting
791 public RetryCache getRetryCache() {
792 return retryCache;
793 }
794
795 /** Whether or not retry cache is enabled */
796 boolean hasRetryCache() {
797 return retryCache != null;
798 }
799
800 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) {
801 if (retryCache != null) {
802 retryCache.addCacheEntryWithPayload(clientId, callId, payload);
803 }
804 }
805
806 void addCacheEntry(byte[] clientId, int callId) {
807 if (retryCache != null) {
808 retryCache.addCacheEntry(clientId, callId);
809 }
810 }
811
812 @VisibleForTesting
813 static RetryCache initRetryCache(Configuration conf) {
814 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY,
815 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT);
816 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled"));
817 if (enable) {
818 float heapPercent = conf.getFloat(
819 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY,
820 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT);
821 long entryExpiryMillis = conf.getLong(
822 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY,
823 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT);
824 LOG.info("Retry cache will use " + heapPercent
825 + " of total heap and retry cache entry expiry time is "
826 + entryExpiryMillis + " millis");
827 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000;
828 return new RetryCache("NameNodeRetryCache", heapPercent,
829 entryExpiryNanos);
830 }
831 return null;
832 }
833
834 private List<AuditLogger> initAuditLoggers(Configuration conf) {
835 // Initialize the custom access loggers if configured.
836 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
837 List<AuditLogger> auditLoggers = Lists.newArrayList();
838 if (alClasses != null && !alClasses.isEmpty()) {
839 for (String className : alClasses) {
840 try {
841 AuditLogger logger;
842 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
843 logger = new DefaultAuditLogger();
844 } else {
845 logger = (AuditLogger) Class.forName(className).newInstance();
846 }
847 logger.initialize(conf);
848 auditLoggers.add(logger);
849 } catch (RuntimeException re) {
850 throw re;
851 } catch (Exception e) {
852 throw new RuntimeException(e);
853 }
854 }
855 }
856
857 // Make sure there is at least one logger installed.
858 if (auditLoggers.isEmpty()) {
859 auditLoggers.add(new DefaultAuditLogger());
860 }
861 return Collections.unmodifiableList(auditLoggers);
862 }
863
864 private void loadFSImage(StartupOption startOpt) throws IOException {
865 final FSImage fsImage = getFSImage();
866
867 // format before starting up if requested
868 if (startOpt == StartupOption.FORMAT) {
869
870 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id
871
872 startOpt = StartupOption.REGULAR;
873 }
874 boolean success = false;
875 writeLock();
876 try {
877 // We shouldn't be calling saveNamespace if we've come up in standby state.
878 MetaRecoveryContext recovery = startOpt.createRecoveryContext();
879 final boolean staleImage
880 = fsImage.recoverTransitionRead(startOpt, this, recovery);
881 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
882 rollingUpgradeInfo = null;
883 }
884 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
885 LOG.info("Need to save fs image? " + needToSave
886 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
887 + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
888 if (needToSave) {
889 fsImage.saveNamespace(this);
890 } else {
891 // No need to save, so mark the phase done.
892 StartupProgress prog = NameNode.getStartupProgress();
893 prog.beginPhase(Phase.SAVING_CHECKPOINT);
894 prog.endPhase(Phase.SAVING_CHECKPOINT);
895 }
896 // This will start a new log segment and write to the seen_txid file, so
897 // we shouldn't do it when coming up in standby state
898 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) {
899 fsImage.openEditLogForWrite();
900 }
901 success = true;
902 } finally {
903 if (!success) {
904 fsImage.close();
905 }
906 writeUnlock();
907 }
908 dir.imageLoadComplete();
909 }
910
911 private void startSecretManager() {
912 if (dtSecretManager != null) {
913 try {
914 dtSecretManager.startThreads();
915 } catch (IOException e) {
916 // Inability to start secret manager
917 // can't be recovered from.
918 throw new RuntimeException(e);
919 }
920 }
921 }
922
923 private void startSecretManagerIfNecessary() {
924 boolean shouldRun = shouldUseDelegationTokens() &&
925 !isInSafeMode() && getEditLog().isOpenForWrite();
926 boolean running = dtSecretManager.isRunning();
927 if (shouldRun && !running) {
928 startSecretManager();
929 }
930 }
931
932 private void stopSecretManager() {
933 if (dtSecretManager != null) {
934 dtSecretManager.stopThreads();
935 }
936 }
937
938 /**
939 * Start services common to both active and standby states
940 * @param haContext
941 * @throws IOException
942 */
943 void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
944 this.registerMBean(); // register the MBean for the FSNamesystemState
945 writeLock();
946 this.haContext = haContext;
947 try {
948 nnResourceChecker = new NameNodeResourceChecker(conf);
949 checkAvailableResources();
950 assert safeMode != null && !isPopulatingReplQueues();
951 StartupProgress prog = NameNode.getStartupProgress();
952 prog.beginPhase(Phase.SAFEMODE);
953 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS,
954 getCompleteBlocksTotal());
955 setBlockTotal();
956 blockManager.activate(conf);
957 } finally {
958 writeUnlock();
959 }
960
961 registerMXBean();
962 DefaultMetricsSystem.instance().register(this);
963 }
964
965 /**
966 * Stop services common to both active and standby states
967 * @throws IOException
968 */
969 void stopCommonServices() {
970 writeLock();
971 try {
972 if (blockManager != null) blockManager.close();
973 } finally {
974 writeUnlock();
975 }
976 RetryCache.clear(retryCache);
977 }
978
979 /**
980 * Start services required in active state
981 * @throws IOException
982 */
983 void startActiveServices() throws IOException {
984 startingActiveService = true;
985 LOG.info("Starting services required for active state");
986 writeLock();
987 try {
988 FSEditLog editLog = dir.fsImage.getEditLog();
989
990 if (!editLog.isOpenForWrite()) {
991 // During startup, we're already open for write during initialization.
992 editLog.initJournalsForWrite();
993 // May need to recover
994 editLog.recoverUnclosedStreams();
995
996 LOG.info("Catching up to latest edits from old active before " +
997 "taking over writer role in edits logs");
998 editLogTailer.catchupDuringFailover();
999
1000 blockManager.setPostponeBlocksFromFuture(false);
1001 blockManager.getDatanodeManager().markAllDatanodesStale();
1002 blockManager.clearQueues();
1003 blockManager.processAllPendingDNMessages();
1004
1005 // Only need to re-process the queue, If not in SafeMode.
1006 if (!isInSafeMode()) {
1007 LOG.info("Reprocessing replication and invalidation queues");
1008 initializeReplQueues();
1009 }
1010
1011 if (LOG.isDebugEnabled()) {
1012 LOG.debug("NameNode metadata after re-processing " +
1013 "replication and invalidation queues during failover:\n" +
1014 metaSaveAsString());
1015 }
1016
1017 long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
1018 LOG.info("Will take over writing edit logs at txnid " +
1019 nextTxId);
1020 editLog.setNextTxId(nextTxId);
1021
1022 dir.fsImage.editLog.openForWrite();
1023 }
1024
1025 if (haEnabled) {
1026 // Renew all of the leases before becoming active.
1027 // This is because, while we were in standby mode,
1028 // the leases weren't getting renewed on this NN.
1029 // Give them all a fresh start here.
1030 leaseManager.renewAllLeases();
1031 }
1032 leaseManager.startMonitor();
1033 startSecretManagerIfNecessary();
1034
1035 //ResourceMonitor required only at ActiveNN. See HDFS-2914
1036 this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
1037 nnrmthread.start();
1038
1039 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
1040 editLogRollerThreshold, editLogRollerInterval));
1041 nnEditLogRoller.start();
1042
1043 cacheManager.startMonitorThread();
1044 blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
1045 } finally {
1046 writeUnlock();
1047 startingActiveService = false;
1048 }
1049 }
1050
1051 /**
1052 * Initialize replication queues.
1053 */
1054 private void initializeReplQueues() {
1055 LOG.info("initializing replication queues");
1056 blockManager.processMisReplicatedBlocks();
1057 initializedReplQueues = true;
1058 }
1059
1060 private boolean inActiveState() {
1061 return haContext != null &&
1062 haContext.getState().getServiceState() == HAServiceState.ACTIVE;
1063 }
1064
1065 /**
1066 * @return Whether the namenode is transitioning to active state and is in the
1067 * middle of the {@link #startActiveServices()}
1068 */
1069 public boolean inTransitionToActive() {
1070 return haEnabled && inActiveState() && startingActiveService;
1071 }
1072
1073 private boolean shouldUseDelegationTokens() {
1074 return UserGroupInformation.isSecurityEnabled() ||
1075 alwaysUseDelegationTokensForTests;
1076 }
1077
1078 /**
1079 * Stop services required in active state
1080 * @throws InterruptedException
1081 */
1082 void stopActiveServices() {
1083 LOG.info("Stopping services started for active state");
1084 writeLock();
1085 try {
1086 stopSecretManager();
1087 if (leaseManager != null) {
1088 leaseManager.stopMonitor();
1089 }
1090 if (nnrmthread != null) {
1091 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
1092 nnrmthread.interrupt();
1093 }
1094 if (nnEditLogRoller != null) {
1095 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
1096 nnEditLogRoller.interrupt();
1097 }
1098 if (dir != null && dir.fsImage != null) {
1099 if (dir.fsImage.editLog != null) {
1100 dir.fsImage.editLog.close();
1101 }
1102 // Update the fsimage with the last txid that we wrote
1103 // so that the tailer starts from the right spot.
1104 dir.fsImage.updateLastAppliedTxIdFromWritten();
1105 }
1106 cacheManager.stopMonitorThread();
1107 cacheManager.clearDirectiveStats();
1108 blockManager.getDatanodeManager().clearPendingCachingCommands();
1109 blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
1110 // Don't want to keep replication queues when not in Active.
1111 blockManager.clearQueues();
1112 initializedReplQueues = false;
1113 } finally {
1114 writeUnlock();
1115 }
1116 }
1117
1118 /**
1119 * Start services required in standby state
1120 *
1121 * @throws IOException
1122 */
1123 void startStandbyServices(final Configuration conf) throws IOException {
1124 LOG.info("Starting services required for standby state");
1125 if (!dir.fsImage.editLog.isOpenForRead()) {
1126 // During startup, we're already open for read.
1127 dir.fsImage.editLog.initSharedJournalsForRead();
1128 }
1129
1130 blockManager.setPostponeBlocksFromFuture(true);
1131
1132 editLogTailer = new EditLogTailer(this, conf);
1133 editLogTailer.start();
1134 if (standbyShouldCheckpoint) {
1135 standbyCheckpointer = new StandbyCheckpointer(conf, this);
1136 standbyCheckpointer.start();
1137 }
1138 }
1139
1140 /**
1141 * Called when the NN is in Standby state and the editlog tailer tails the
1142 * OP_ROLLING_UPGRADE_START.
1143 */
1144 void triggerRollbackCheckpoint() {
1145 setNeedRollbackFsImage(true);
1146 if (standbyCheckpointer != null) {
1147 standbyCheckpointer.triggerRollbackCheckpoint();
1148 }
1149 }
1150
1151 /**
1152 * Called while the NN is in Standby state, but just about to be
1153 * asked to enter Active state. This cancels any checkpoints
1154 * currently being taken.
1155 */
1156 void prepareToStopStandbyServices() throws ServiceFailedException {
1157 if (standbyCheckpointer != null) {
1158 standbyCheckpointer.cancelAndPreventCheckpoints(
1159 "About to leave standby state");
1160 }
1161 }
1162
1163 /** Stop services required in standby state */
1164 void stopStandbyServices() throws IOException {
1165 LOG.info("Stopping services started for standby state");
1166 if (standbyCheckpointer != null) {
1167 standbyCheckpointer.stop();
1168 }
1169 if (editLogTailer != null) {
1170 editLogTailer.stop();
1171 }
1172 if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) {
1173 dir.fsImage.editLog.close();
1174 }
1175 }
1176
1177 @Override
1178 public void checkOperation(OperationCategory op) throws StandbyException {
1179 if (haContext != null) {
1180 // null in some unit tests
1181 haContext.checkOperation(op);
1182 }
1183 }
1184
1185 /**
1186 * @throws RetriableException
1187 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3)
1188 * NameNode is in active state
1189 * @throws SafeModeException
1190 * Otherwise if NameNode is in SafeMode.
1191 */
1192 private void checkNameNodeSafeMode(String errorMsg)
1193 throws RetriableException, SafeModeException {
1194 if (isInSafeMode()) {
1195 SafeModeException se = new SafeModeException(errorMsg, safeMode);
1196 if (haEnabled && haContext != null
1197 && haContext.getState().getServiceState() == HAServiceState.ACTIVE
1198 && shouldRetrySafeMode(this.safeMode)) {
1199 throw new RetriableException(se);
1200 } else {
1201 throw se;
1202 }
1203 }
1204 }
1205
1206 /**
1207 * We already know that the safemode is on. We will throw a RetriableException
1208 * if the safemode is not manual or caused by low resource.
1209 */
1210 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
1211 if (safeMode == null) {
1212 return false;
1213 } else {
1214 return !safeMode.isManual() && !safeMode.areResourcesLow();
1215 }
1216 }
1217
1218 public static Collection<URI> getNamespaceDirs(Configuration conf) {
1219 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
1220 }
1221
1222 /**
1223 * Get all edits dirs which are required. If any shared edits dirs are
1224 * configured, these are also included in the set of required dirs.
1225 *
1226 * @param conf the HDFS configuration.
1227 * @return all required dirs.
1228 */
1229 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
1230 Set<URI> ret = new HashSet<URI>();
1231 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
1232 ret.addAll(getSharedEditsDirs(conf));
1233 return ret;
1234 }
1235
1236 private static Collection<URI> getStorageDirs(Configuration conf,
1237 String propertyName) {
1238 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
1239 StartupOption startOpt = NameNode.getStartupOption(conf);
1240 if(startOpt == StartupOption.IMPORT) {
1241 // In case of IMPORT this will get rid of default directories
1242 // but will retain directories specified in hdfs-site.xml
1243 // When importing image from a checkpoint, the name-node can
1244 // start with empty set of storage directories.
1245 Configuration cE = new HdfsConfiguration(false);
1246 cE.addResource("core-default.xml");
1247 cE.addResource("core-site.xml");
1248 cE.addResource("hdfs-default.xml");
1249 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
1250 dirNames.removeAll(dirNames2);
1251 if(dirNames.isEmpty())
1252 LOG.warn("!!! WARNING !!!" +
1253 "\n\tThe NameNode currently runs without persistent storage." +
1254 "\n\tAny changes to the file system meta-data may be lost." +
1255 "\n\tRecommended actions:" +
1256 "\n\t\t- shutdown and restart NameNode with configured \""
1257 + propertyName + "\" in hdfs-site.xml;" +
1258 "\n\t\t- use Backup Node as a persistent and up-to-date storage " +
1259 "of the file system meta-data.");
1260 } else if (dirNames.isEmpty()) {
1261 dirNames = Collections.singletonList(
1262 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT);
1263 }
1264 return Util.stringCollectionAsURIs(dirNames);
1265 }
1266
1267 /**
1268 * Return an ordered list of edits directories to write to.
1269 * The list is ordered such that all shared edits directories
1270 * are ordered before non-shared directories, and any duplicates
1271 * are removed. The order they are specified in the configuration
1272 * is retained.
1273 * @return Collection of shared edits directories.
1274 * @throws IOException if multiple shared edits directories are configured
1275 */
1276 public static List<URI> getNamespaceEditsDirs(Configuration conf)
1277 throws IOException {
1278 return getNamespaceEditsDirs(conf, true);
1279 }
1280
1281 public static List<URI> getNamespaceEditsDirs(Configuration conf,
1282 boolean includeShared)
1283 throws IOException {
1284 // Use a LinkedHashSet so that order is maintained while we de-dup
1285 // the entries.
1286 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
1287
1288 if (includeShared) {
1289 List<URI> sharedDirs = getSharedEditsDirs(conf);
1290
1291 // Fail until multiple shared edits directories are supported (HDFS-2782)
1292 if (sharedDirs.size() > 1) {
1293 throw new IOException(
1294 "Multiple shared edits directories are not yet supported");
1295 }
1296
1297 // First add the shared edits dirs. It's critical that the shared dirs
1298 // are added first, since JournalSet syncs them in the order they are listed,
1299 // and we need to make sure all edits are in place in the shared storage
1300 // before they are replicated locally. See HDFS-2874.
1301 for (URI dir : sharedDirs) {
1302 if (!editsDirs.add(dir)) {
1303 LOG.warn("Edits URI " + dir + " listed multiple times in " +
1304 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
1305 }
1306 }
1307 }
1308 // Now add the non-shared dirs.
1309 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
1310 if (!editsDirs.add(dir)) {
1311 LOG.warn("Edits URI " + dir + " listed multiple times in " +
1312 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
1313 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
1314 }
1315 }
1316
1317 if (editsDirs.isEmpty()) {
1318 // If this is the case, no edit dirs have been explicitly configured.
1319 // Image dirs are to be used for edits too.
1320 return Lists.newArrayList(getNamespaceDirs(conf));
1321 } else {
1322 return Lists.newArrayList(editsDirs);
1323 }
1324 }
1325
1326 /**
1327 * Returns edit directories that are shared between primary and secondary.
1328 * @param conf
1329 * @return Collection of edit directories.
1330 */
1331 public static List<URI> getSharedEditsDirs(Configuration conf) {
1332 // don't use getStorageDirs here, because we want an empty default
1333 // rather than the dir in /tmp
1334 Collection<String> dirNames = conf.getTrimmedStringCollection(
1335 DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
1336 return Util.stringCollectionAsURIs(dirNames);
1337 }
1338
1339 @Override
1340 public void readLock() {
1341 this.fsLock.readLock().lock();
1342 }
1343 @Override
1344 public void longReadLockInterruptibly() throws InterruptedException {
1345 this.fsLock.longReadLock().lockInterruptibly();
1346 try {
1347 this.fsLock.readLock().lockInterruptibly();
1348 } catch (InterruptedException ie) {
1349 // In the event we're interrupted while getting the normal FSNS read lock,
1350 // release the long read lock.
1351 this.fsLock.longReadLock().unlock();
1352 throw ie;
1353 }
1354 }
1355 @Override
1356 public void longReadUnlock() {
1357 this.fsLock.readLock().unlock();
1358 this.fsLock.longReadLock().unlock();
1359 }
1360 @Override
1361 public void readUnlock() {
1362 this.fsLock.readLock().unlock();
1363 }
1364 @Override
1365 public void writeLock() {
1366 this.fsLock.longReadLock().lock();
1367 this.fsLock.writeLock().lock();
1368 }
1369 @Override
1370 public void writeLockInterruptibly() throws InterruptedException {
1371 this.fsLock.longReadLock().lockInterruptibly();
1372 try {
1373 this.fsLock.writeLock().lockInterruptibly();
1374 } catch (InterruptedException ie) {
1375 // In the event we're interrupted while getting the normal FSNS write
1376 // lock, release the long read lock.
1377 this.fsLock.longReadLock().unlock();
1378 throw ie;
1379 }
1380 }
1381 @Override
1382 public void writeUnlock() {
1383 this.fsLock.writeLock().unlock();
1384 this.fsLock.longReadLock().unlock();
1385 }
1386 @Override
1387 public boolean hasWriteLock() {
1388 return this.fsLock.isWriteLockedByCurrentThread();
1389 }
1390 @Override
1391 public boolean hasReadLock() {
1392 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock();
1393 }
1394
1395 public int getReadHoldCount() {
1396 return this.fsLock.getReadHoldCount();
1397 }
1398
1399 public int getWriteHoldCount() {
1400 return this.fsLock.getWriteHoldCount();
1401 }
1402
1403 NamespaceInfo getNamespaceInfo() {
1404 readLock();
1405 try {
1406 return unprotectedGetNamespaceInfo();
1407 } finally {
1408 readUnlock();
1409 }
1410 }
1411
1412 /**
1413 * Version of @see #getNamespaceInfo() that is not protected by a lock.
1414 */
1415 NamespaceInfo unprotectedGetNamespaceInfo() {
1416 return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
1417 getClusterId(), getBlockPoolId(),
1418 dir.fsImage.getStorage().getCTime());
1419 }
1420
1421 /**
1422 * Close down this file system manager.
1423 * Causes heartbeat and lease daemons to stop; waits briefly for
1424 * them to finish, but a short timeout returns control back to caller.
1425 */
1426 void close() {
1427 fsRunning = false;
1428 try {
1429 stopCommonServices();
1430 if (smmthread != null) smmthread.interrupt();
1431 } finally {
1432 // using finally to ensure we also wait for lease daemon
1433 try {
1434 stopActiveServices();
1435 stopStandbyServices();
1436 if (dir != null) {
1437 dir.close();
1438 }
1439 } catch (IOException ie) {
1440 LOG.error("Error closing FSDirectory", ie);
1441 IOUtils.cleanup(LOG, dir);
1442 }
1443 }
1444 }
1445
1446 @Override
1447 public boolean isRunning() {
1448 return fsRunning;
1449 }
1450
1451 @Override
1452 public boolean isInStandbyState() {
1453 if (haContext == null || haContext.getState() == null) {
1454 // We're still starting up. In this case, if HA is
1455 // on for the cluster, we always start in standby. Otherwise
1456 // start in active.
1457 return haEnabled;
1458 }
1459
1460 return HAServiceState.STANDBY == haContext.getState().getServiceState();
1461 }
1462
1463 /**
1464 * Dump all metadata into specified file
1465 */
1466 void metaSave(String filename) throws IOException {
1467 checkSuperuserPrivilege();
1468 checkOperation(OperationCategory.UNCHECKED);
1469 writeLock();
1470 try {
1471 checkOperation(OperationCategory.UNCHECKED);
1472 File file = new File(System.getProperty("hadoop.log.dir"), filename);
1473 PrintWriter out = new PrintWriter(new BufferedWriter(
1474 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)));
1475 metaSave(out);
1476 out.flush();
1477 out.close();
1478 } finally {
1479 writeUnlock();
1480 }
1481 }
1482
1483 private void metaSave(PrintWriter out) {
1484 assert hasWriteLock();
1485 long totalInodes = this.dir.totalInodes();
1486 long totalBlocks = this.getBlocksTotal();
1487 out.println(totalInodes + " files and directories, " + totalBlocks
1488 + " blocks = " + (totalInodes + totalBlocks) + " total");
1489
1490 blockManager.metaSave(out);
1491 }
1492
1493 private String metaSaveAsString() {
1494 StringWriter sw = new StringWriter();
1495 PrintWriter pw = new PrintWriter(sw);
1496 metaSave(pw);
1497 pw.flush();
1498 return sw.toString();
1499 }
1500
1501
1502 long getDefaultBlockSize() {
1503 return serverDefaults.getBlockSize();
1504 }
1505
1506 FsServerDefaults getServerDefaults() throws StandbyException {
1507 checkOperation(OperationCategory.READ);
1508 return serverDefaults;
1509 }
1510
1511 long getAccessTimePrecision() {
1512 return accessTimePrecision;
1513 }
1514
1515 private boolean isAccessTimeSupported() {
1516 return accessTimePrecision > 0;
1517 }
1518
1519 /////////////////////////////////////////////////////////
1520 //
1521 // These methods are called by HadoopFS clients
1522 //
1523 /////////////////////////////////////////////////////////
1524 /**
1525 * Set permissions for an existing file.
1526 * @throws IOException
1527 */
1528 void setPermission(String src, FsPermission permission)
1529 throws AccessControlException, FileNotFoundException, SafeModeException,
1530 UnresolvedLinkException, IOException {
1531 try {
1532 setPermissionInt(src, permission);
1533 } catch (AccessControlException e) {
1534 logAuditEvent(false, "setPermission", src);
1535 throw e;
1536 }
1537 }
1538
1539 private void setPermissionInt(String src, FsPermission permission)
1540 throws AccessControlException, FileNotFoundException, SafeModeException,
1541 UnresolvedLinkException, IOException {
1542 HdfsFileStatus resultingStat = null;
1543 FSPermissionChecker pc = getPermissionChecker();
1544 checkOperation(OperationCategory.WRITE);
1545 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1546 writeLock();
1547 try {
1548 checkOperation(OperationCategory.WRITE);
1549 checkNameNodeSafeMode("Cannot set permission for " + src);
1550 src = FSDirectory.resolvePath(src, pathComponents, dir);
1551 checkOwner(pc, src);
1552 dir.setPermission(src, permission);
1553 resultingStat = getAuditFileInfo(src, false);
1554 } finally {
1555 writeUnlock();
1556 }
1557 getEditLog().logSync();
1558 logAuditEvent(true, "setPermission", src, null, resultingStat);
1559 }
1560
1561 /**
1562 * Set owner for an existing file.
1563 * @throws IOException
1564 */
1565 void setOwner(String src, String username, String group)
1566 throws AccessControlException, FileNotFoundException, SafeModeException,
1567 UnresolvedLinkException, IOException {
1568 try {
1569 setOwnerInt(src, username, group);
1570 } catch (AccessControlException e) {
1571 logAuditEvent(false, "setOwner", src);
1572 throw e;
1573 }
1574 }
1575
1576 private void setOwnerInt(String src, String username, String group)
1577 throws AccessControlException, FileNotFoundException, SafeModeException,
1578 UnresolvedLinkException, IOException {
1579 HdfsFileStatus resultingStat = null;
1580 FSPermissionChecker pc = getPermissionChecker();
1581 checkOperation(OperationCategory.WRITE);
1582 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1583 writeLock();
1584 try {
1585 checkOperation(OperationCategory.WRITE);
1586 checkNameNodeSafeMode("Cannot set owner for " + src);
1587 src = FSDirectory.resolvePath(src, pathComponents, dir);
1588 checkOwner(pc, src);
1589 if (!pc.isSuperUser()) {
1590 if (username != null && !pc.getUser().equals(username)) {
1591 throw new AccessControlException("Non-super user cannot change owner");
1592 }
1593 if (group != null && !pc.containsGroup(group)) {
1594 throw new AccessControlException("User does not belong to " + group);
1595 }
1596 }
1597 dir.setOwner(src, username, group);
1598 resultingStat = getAuditFileInfo(src, false);
1599 } finally {
1600 writeUnlock();
1601 }
1602 getEditLog().logSync();
1603 logAuditEvent(true, "setOwner", src, null, resultingStat);
1604 }
1605
1606 /**
1607 * Get block locations within the specified range.
1608 * @see ClientProtocol#getBlockLocations(String, long, long)
1609 */
1610 LocatedBlocks getBlockLocations(String clientMachine, String src,
1611 long offset, long length) throws AccessControlException,
1612 FileNotFoundException, UnresolvedLinkException, IOException {
1613 LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
1614 true);
1615 if (blocks != null) {
1616 blockManager.getDatanodeManager().sortLocatedBlocks(
1617 clientMachine, blocks.getLocatedBlocks());
1618
1619 LocatedBlock lastBlock = blocks.getLastLocatedBlock();
1620 if (lastBlock != null) {
1621 ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>();
1622 lastBlockList.add(lastBlock);
1623 blockManager.getDatanodeManager().sortLocatedBlocks(
1624 clientMachine, lastBlockList);
1625 }
1626 }
1627 return blocks;
1628 }
1629
1630 /**
1631 * Get block locations within the specified range.
1632 * @see ClientProtocol#getBlockLocations(String, long, long)
1633 * @throws FileNotFoundException, UnresolvedLinkException, IOException
1634 */
1635 LocatedBlocks getBlockLocations(String src, long offset, long length,
1636 boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
1637 throws FileNotFoundException, UnresolvedLinkException, IOException {
1638 try {
1639 return getBlockLocationsInt(src, offset, length, doAccessTime,
1640 needBlockToken, checkSafeMode);
1641 } catch (AccessControlException e) {
1642 logAuditEvent(false, "open", src);
1643 throw e;
1644 }
1645 }
1646
1647 private LocatedBlocks getBlockLocationsInt(String src, long offset,
1648 long length, boolean doAccessTime, boolean needBlockToken,
1649 boolean checkSafeMode)
1650 throws FileNotFoundException, UnresolvedLinkException, IOException {
1651 if (offset < 0) {
1652 throw new HadoopIllegalArgumentException(
1653 "Negative offset is not supported. File: " + src);
1654 }
1655 if (length < 0) {
1656 throw new HadoopIllegalArgumentException(
1657 "Negative length is not supported. File: " + src);
1658 }
1659 final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
1660 offset, length, doAccessTime, needBlockToken);
1661 logAuditEvent(true, "open", src);
1662 if (checkSafeMode && isInSafeMode()) {
1663 for (LocatedBlock b : ret.getLocatedBlocks()) {
1664 // if safemode & no block locations yet then throw safemodeException
1665 if ((b.getLocations() == null) || (b.getLocations().length == 0)) {
1666 SafeModeException se = new SafeModeException(
1667 "Zero blocklocations for " + src, safeMode);
1668 if (haEnabled && haContext != null &&
1669 haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
1670 throw new RetriableException(se);
1671 } else {
1672 throw se;
1673 }
1674 }
1675 }
1676 }
1677 return ret;
1678 }
1679
1680 /*
1681 * Get block locations within the specified range, updating the
1682 * access times if necessary.
1683 */
1684 private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
1685 long length, boolean doAccessTime, boolean needBlockToken)
1686 throws FileNotFoundException,
1687 UnresolvedLinkException, IOException {
1688 FSPermissionChecker pc = getPermissionChecker();
1689 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1690 for (int attempt = 0; attempt < 2; attempt++) {
1691 boolean isReadOp = (attempt == 0);
1692 if (isReadOp) { // first attempt is with readlock
1693 checkOperation(OperationCategory.READ);
1694 readLock();
1695 } else { // second attempt is with write lock
1696 checkOperation(OperationCategory.WRITE);
1697 writeLock(); // writelock is needed to set accesstime
1698 }
1699 src = FSDirectory.resolvePath(src, pathComponents, dir);
1700 try {
1701 if (isReadOp) {
1702 checkOperation(OperationCategory.READ);
1703 } else {
1704 checkOperation(OperationCategory.WRITE);
1705 }
1706 if (isPermissionEnabled) {
1707 checkPathAccess(pc, src, FsAction.READ);
1708 }
1709
1710 // if the namenode is in safemode, then do not update access time
1711 if (isInSafeMode()) {
1712 doAccessTime = false;
1713 }
1714
1715 final INodesInPath iip = dir.getLastINodeInPath(src);
1716 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src);
1717 if (!iip.isSnapshot() //snapshots are readonly, so don't update atime.
1718 && doAccessTime && isAccessTimeSupported()) {
1719 final long now = now();
1720 if (now > inode.getAccessTime() + getAccessTimePrecision()) {
1721 // if we have to set access time but we only have the readlock, then
1722 // restart this entire operation with the writeLock.
1723 if (isReadOp) {
1724 continue;
1725 }
1726 dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshotId());
1727 }
1728 }
1729 final long fileSize = iip.isSnapshot() ?
1730 inode.computeFileSize(iip.getPathSnapshotId())
1731 : inode.computeFileSizeNotIncludingLastUcBlock();
1732 boolean isUc = inode.isUnderConstruction();
1733 if (iip.isSnapshot()) {
1734 // if src indicates a snapshot file, we need to make sure the returned
1735 // blocks do not exceed the size of the snapshot file.
1736 length = Math.min(length, fileSize - offset);
1737 isUc = false;
1738 }
1739 LocatedBlocks blocks =
1740 blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
1741 isUc, offset, length, needBlockToken, iip.isSnapshot());
1742 // Set caching information for the located blocks.
1743 for (LocatedBlock lb: blocks.getLocatedBlocks()) {
1744 cacheManager.setCachedLocations(lb);
1745 }
1746 return blocks;
1747 } finally {
1748 if (isReadOp) {
1749 readUnlock();
1750 } else {
1751 writeUnlock();
1752 }
1753 }
1754 }
1755 return null; // can never reach here
1756 }
1757
1758 /**
1759 * Moves all the blocks from srcs and appends them to trg
1760 * To avoid rollbacks we will verify validitity of ALL of the args
1761 * before we start actual move.
1762 *
1763 * This does not support ".inodes" relative path
1764 * @param target
1765 * @param srcs
1766 * @throws IOException
1767 */
1768 void concat(String target, String [] srcs)
1769 throws IOException, UnresolvedLinkException {
1770 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
1771 if (cacheEntry != null && cacheEntry.isSuccess()) {
1772 return; // Return previous response
1773 }
1774
1775 // Either there is no previous request in progres or it has failed
1776 if(FSNamesystem.LOG.isDebugEnabled()) {
1777 FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
1778 " to " + target);
1779 }
1780
1781 boolean success = false;
1782 try {
1783 concatInt(target, srcs, cacheEntry != null);
1784 success = true;
1785 } catch (AccessControlException e) {
1786 logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
1787 throw e;
1788 } finally {
1789 RetryCache.setState(cacheEntry, success);
1790 }
1791 }
1792
1793 private void concatInt(String target, String [] srcs,
1794 boolean logRetryCache) throws IOException, UnresolvedLinkException {
1795 // verify args
1796 if(target.isEmpty()) {
1797 throw new IllegalArgumentException("Target file name is empty");
1798 }
1799 if(srcs == null || srcs.length == 0) {
1800 throw new IllegalArgumentException("No sources given");
1801 }
1802
1803 // We require all files be in the same directory
1804 String trgParent =
1805 target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR));
1806 for (String s : srcs) {
1807 String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR));
1808 if (!srcParent.equals(trgParent)) {
1809 throw new IllegalArgumentException(
1810 "Sources and target are not in the same directory");
1811 }
1812 }
1813
1814 HdfsFileStatus resultingStat = null;
1815 FSPermissionChecker pc = getPermissionChecker();
1816 checkOperation(OperationCategory.WRITE);
1817 writeLock();
1818 try {
1819 checkOperation(OperationCategory.WRITE);
1820 checkNameNodeSafeMode("Cannot concat " + target);
1821 concatInternal(pc, target, srcs, logRetryCache);
1822 resultingStat = getAuditFileInfo(target, false);
1823 } finally {
1824 writeUnlock();
1825 }
1826 getEditLog().logSync();
1827 logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
1828 }
1829
1830 /** See {@link #concat(String, String[])} */
1831 private void concatInternal(FSPermissionChecker pc, String target,
1832 String[] srcs, boolean logRetryCache) throws IOException,
1833 UnresolvedLinkException {
1834 assert hasWriteLock();
1835
1836 // write permission for the target
1837 if (isPermissionEnabled) {
1838 checkPathAccess(pc, target, FsAction.WRITE);
1839
1840 // and srcs
1841 for(String aSrc: srcs) {
1842 checkPathAccess(pc, aSrc, FsAction.READ); // read the file
1843 checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete
1844 }
1845 }
1846
1847 // to make sure no two files are the same
1848 Set<INode> si = new HashSet<INode>();
1849
1850 // we put the following prerequisite for the operation
1851 // replication and blocks sizes should be the same for ALL the blocks
1852
1853 // check the target
1854 final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target),
1855 target);
1856 if(trgInode.isUnderConstruction()) {
1857 throw new HadoopIllegalArgumentException("concat: target file "
1858 + target + " is under construction");
1859 }
1860 // per design target shouldn't be empty and all the blocks same size
1861 if(trgInode.numBlocks() == 0) {
1862 throw new HadoopIllegalArgumentException("concat: target file "
1863 + target + " is empty");
1864 }
1865 if (trgInode.isWithSnapshot()) {
1866 throw new HadoopIllegalArgumentException("concat: target file "
1867 + target + " is in a snapshot");
1868 }
1869
1870 long blockSize = trgInode.getPreferredBlockSize();
1871
1872 // check the end block to be full
1873 final BlockInfo last = trgInode.getLastBlock();
1874 if(blockSize != last.getNumBytes()) {
1875 throw new HadoopIllegalArgumentException("The last block in " + target
1876 + " is not full; last block size = " + last.getNumBytes()
1877 + " but file block size = " + blockSize);
1878 }
1879
1880 si.add(trgInode);
1881 final short repl = trgInode.getFileReplication();
1882
1883 // now check the srcs
1884 boolean endSrc = false; // final src file doesn't have to have full end block
1885 for(int i=0; i<srcs.length; i++) {
1886 String src = srcs[i];
1887 if(i==srcs.length-1)
1888 endSrc=true;
1889
1890 final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src);
1891 if(src.isEmpty()
1892 || srcInode.isUnderConstruction()
1893 || srcInode.numBlocks() == 0) {
1894 throw new HadoopIllegalArgumentException("concat: source file " + src
1895 + " is invalid or empty or underConstruction");
1896 }
1897
1898 // check replication and blocks size
1899 if(repl != srcInode.getBlockReplication()) {
1900 throw new HadoopIllegalArgumentException("concat: the soruce file "
1901 + src + " and the target file " + target
1902 + " should have the same replication: source replication is "
1903 + srcInode.getBlockReplication()
1904 + " but target replication is " + repl);
1905 }
1906
1907 //boolean endBlock=false;
1908 // verify that all the blocks are of the same length as target
1909 // should be enough to check the end blocks
1910 final BlockInfo[] srcBlocks = srcInode.getBlocks();
1911 int idx = srcBlocks.length-1;
1912 if(endSrc)
1913 idx = srcBlocks.length-2; // end block of endSrc is OK not to be full
1914 if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) {
1915 throw new HadoopIllegalArgumentException("concat: the soruce file "
1916 + src + " and the target file " + target
1917 + " should have the same blocks sizes: target block size is "
1918 + blockSize + " but the size of source block " + idx + " is "
1919 + srcBlocks[idx].getNumBytes());
1920 }
1921
1922 si.add(srcInode);
1923 }
1924
1925 // make sure no two files are the same
1926 if(si.size() < srcs.length+1) { // trg + srcs
1927 // it means at least two files are the same
1928 throw new HadoopIllegalArgumentException(
1929 "concat: at least two of the source files are the same");
1930 }
1931
1932 if(NameNode.stateChangeLog.isDebugEnabled()) {
1933 NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " +
1934 Arrays.toString(srcs) + " to " + target);
1935 }
1936
1937 dir.concat(target,srcs, logRetryCache);
1938 }
1939
1940 /**
1941 * stores the modification and access time for this inode.
1942 * The access time is precise upto an hour. The transaction, if needed, is
1943 * written to the edits log but is not flushed.
1944 */
1945 void setTimes(String src, long mtime, long atime)
1946 throws IOException, UnresolvedLinkException {
1947 if (!isAccessTimeSupported() && atime != -1) {
1948 throw new IOException("Access time for hdfs is not configured. " +
1949 " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
1950 }
1951 try {
1952 setTimesInt(src, mtime, atime);
1953 } catch (AccessControlException e) {
1954 logAuditEvent(false, "setTimes", src);
1955 throw e;
1956 }
1957 }
1958
1959 private void setTimesInt(String src, long mtime, long atime)
1960 throws IOException, UnresolvedLinkException {
1961 HdfsFileStatus resultingStat = null;
1962 FSPermissionChecker pc = getPermissionChecker();
1963 checkOperation(OperationCategory.WRITE);
1964 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
1965 writeLock();
1966 try {
1967 checkOperation(OperationCategory.WRITE);
1968 checkNameNodeSafeMode("Cannot set times " + src);
1969 src = FSDirectory.resolvePath(src, pathComponents, dir);
1970
1971 // Write access is required to set access and modification times
1972 if (isPermissionEnabled) {
1973 checkPathAccess(pc, src, FsAction.WRITE);
1974 }
1975 final INodesInPath iip = dir.getINodesInPath4Write(src);
1976 final INode inode = iip.getLastINode();
1977 if (inode != null) {
1978 dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshotId());
1979 resultingStat = getAuditFileInfo(src, false);
1980 } else {
1981 throw new FileNotFoundException("File/Directory " + src + " does not exist.");
1982 }
1983 } finally {
1984 writeUnlock();
1985 }
1986 logAuditEvent(true, "setTimes", src, null, resultingStat);
1987 }
1988
1989 /**
1990 * Create a symbolic link.
1991 */
1992 @SuppressWarnings("deprecation")
1993 void createSymlink(String target, String link,
1994 PermissionStatus dirPerms, boolean createParent)
1995 throws IOException, UnresolvedLinkException {
1996 if (!FileSystem.areSymlinksEnabled()) {
1997 throw new UnsupportedOperationException("Symlinks not supported");
1998 }
1999 if (!DFSUtil.isValidName(link)) {
2000 throw new InvalidPathException("Invalid link name: " + link);
2001 }
2002 if (FSDirectory.isReservedName(target)) {
2003 throw new InvalidPathException("Invalid target name: " + target);
2004 }
2005 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
2006 if (cacheEntry != null && cacheEntry.isSuccess()) {
2007 return; // Return previous response
2008 }
2009 boolean success = false;
2010 try {
2011 createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null);
2012 success = true;
2013 } catch (AccessControlException e) {
2014 logAuditEvent(false, "createSymlink", link, target, null);
2015 throw e;
2016 } finally {
2017 RetryCache.setState(cacheEntry, success);
2018 }
2019 }
2020
2021 private void createSymlinkInt(String target, String link,
2022 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache)
2023 throws IOException, UnresolvedLinkException {
2024 if (NameNode.stateChangeLog.isDebugEnabled()) {
2025 NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
2026 + target + " link=" + link);
2027 }
2028 HdfsFileStatus resultingStat = null;
2029 FSPermissionChecker pc = getPermissionChecker();
2030 checkOperation(OperationCategory.WRITE);
2031 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link);
2032 writeLock();
2033 try {
2034 checkOperation(OperationCategory.WRITE);
2035 checkNameNodeSafeMode("Cannot create symlink " + link);
2036 link = FSDirectory.resolvePath(link, pathComponents, dir);
2037 if (!createParent) {
2038 verifyParentDir(link);
2039 }
2040 if (!dir.isValidToCreate(link)) {
2041 throw new IOException("failed to create link " + link
2042 +" either because the filename is invalid or the file exists");
2043 }
2044 if (isPermissionEnabled) {
2045 checkAncestorAccess(pc, link, FsAction.WRITE);
2046 }
2047 // validate that we have enough inodes.
2048 checkFsObjectLimit();
2049
2050 // add symbolic link to namespace
2051 dir.addSymlink(link, target, dirPerms, createParent, logRetryCache);
2052 resultingStat = getAuditFileInfo(link, false);
2053 } finally {
2054 writeUnlock();
2055 }
2056 getEditLog().logSync();
2057 logAuditEvent(true, "createSymlink", link, target, resultingStat);
2058 }
2059
2060 /**
2061 * Set replication for an existing file.
2062 *
2063 * The NameNode sets new replication and schedules either replication of
2064 * under-replicated data blocks or removal of the excessive block copies
2065 * if the blocks are over-replicated.
2066 *
2067 * @see ClientProtocol#setReplication(String, short)
2068 * @param src file name
2069 * @param replication new replication
2070 * @return true if successful;
2071 * false if file does not exist or is a directory
2072 */
2073 boolean setReplication(final String src, final short replication)
2074 throws IOException {
2075 try {
2076 return setReplicationInt(src, replication);
2077 } catch (AccessControlException e) {
2078 logAuditEvent(false, "setReplication", src);
2079 throw e;
2080 }
2081 }
2082
2083 private boolean setReplicationInt(String src, final short replication)
2084 throws IOException {
2085 blockManager.verifyReplication(src, replication, null);
2086 final boolean isFile;
2087 FSPermissionChecker pc = getPermissionChecker();
2088 checkOperation(OperationCategory.WRITE);
2089 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2090 writeLock();
2091 try {
2092 checkOperation(OperationCategory.WRITE);
2093 checkNameNodeSafeMode("Cannot set replication for " + src);
2094 src = FSDirectory.resolvePath(src, pathComponents, dir);
2095 if (isPermissionEnabled) {
2096 checkPathAccess(pc, src, FsAction.WRITE);
2097 }
2098
2099 final short[] blockRepls = new short[2]; // 0: old, 1: new
2100 final Block[] blocks = dir.setReplication(src, replication, blockRepls);
2101 isFile = blocks != null;
2102 if (isFile) {
2103 blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks);
2104 }
2105 } finally {
2106 writeUnlock();
2107 }
2108
2109 getEditLog().logSync();
2110 if (isFile) {
2111 logAuditEvent(true, "setReplication", src);
2112 }
2113 return isFile;
2114 }
2115
2116 long getPreferredBlockSize(String filename)
2117 throws IOException, UnresolvedLinkException {
2118 FSPermissionChecker pc = getPermissionChecker();
2119 checkOperation(OperationCategory.READ);
2120 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename);
2121 readLock();
2122 try {
2123 checkOperation(OperationCategory.READ);
2124 filename = FSDirectory.resolvePath(filename, pathComponents, dir);
2125 if (isPermissionEnabled) {
2126 checkTraverse(pc, filename);
2127 }
2128 return dir.getPreferredBlockSize(filename);
2129 } finally {
2130 readUnlock();
2131 }
2132 }
2133
2134 /**
2135 * Verify that parent directory of src exists.
2136 */
2137 private void verifyParentDir(String src) throws FileNotFoundException,
2138 ParentNotDirectoryException, UnresolvedLinkException {
2139 assert hasReadLock();
2140 Path parent = new Path(src).getParent();
2141 if (parent != null) {
2142 final INode parentNode = dir.getINode(parent.toString());
2143 if (parentNode == null) {
2144 throw new FileNotFoundException("Parent directory doesn't exist: "
2145 + parent);
2146 } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) {
2147 throw new ParentNotDirectoryException("Parent path is not a directory: "
2148 + parent);
2149 }
2150 }
2151 }
2152
2153 /**
2154 * Create a new file entry in the namespace.
2155 *
2156 * For description of parameters and exceptions thrown see
2157 * {@link ClientProtocol#create()}, except it returns valid file status upon
2158 * success
2159 *
2160 * For retryCache handling details see -
2161 * {@link #getFileStatus(boolean, CacheEntryWithPayload)}
2162 *
2163 */
2164 HdfsFileStatus startFile(String src, PermissionStatus permissions,
2165 String holder, String clientMachine, EnumSet<CreateFlag> flag,
2166 boolean createParent, short replication, long blockSize)
2167 throws AccessControlException, SafeModeException,
2168 FileAlreadyExistsException, UnresolvedLinkException,
2169 FileNotFoundException, ParentNotDirectoryException, IOException {
2170 HdfsFileStatus status = null;
2171 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2172 null);
2173 if (cacheEntry != null && cacheEntry.isSuccess()) {
2174 return (HdfsFileStatus) cacheEntry.getPayload();
2175 }
2176
2177 try {
2178 status = startFileInt(src, permissions, holder, clientMachine, flag,
2179 createParent, replication, blockSize, cacheEntry != null);
2180 } catch (AccessControlException e) {
2181 logAuditEvent(false, "create", src);
2182 throw e;
2183 } finally {
2184 RetryCache.setState(cacheEntry, status != null, status);
2185 }
2186 return status;
2187 }
2188
2189 private HdfsFileStatus startFileInt(String src, PermissionStatus permissions,
2190 String holder, String clientMachine, EnumSet<CreateFlag> flag,
2191 boolean createParent, short replication, long blockSize,
2192 boolean logRetryCache) throws AccessControlException, SafeModeException,
2193 FileAlreadyExistsException, UnresolvedLinkException,
2194 FileNotFoundException, ParentNotDirectoryException, IOException {
2195 if (NameNode.stateChangeLog.isDebugEnabled()) {
2196 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
2197 + ", holder=" + holder
2198 + ", clientMachine=" + clientMachine
2199 + ", createParent=" + createParent
2200 + ", replication=" + replication
2201 + ", createFlag=" + flag.toString());
2202 }
2203 if (!DFSUtil.isValidName(src)) {
2204 throw new InvalidPathException(src);
2205 }
2206 blockManager.verifyReplication(src, replication, clientMachine);
2207
2208 boolean skipSync = false;
2209 HdfsFileStatus stat = null;
2210 FSPermissionChecker pc = getPermissionChecker();
2211 checkOperation(OperationCategory.WRITE);
2212 if (blockSize < minBlockSize) {
2213 throw new IOException("Specified block size is less than configured" +
2214 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
2215 + "): " + blockSize + " < " + minBlockSize);
2216 }
2217 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2218 boolean create = flag.contains(CreateFlag.CREATE);
2219 boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
2220 writeLock();
2221 try {
2222 checkOperation(OperationCategory.WRITE);
2223 checkNameNodeSafeMode("Cannot create file" + src);
2224 src = FSDirectory.resolvePath(src, pathComponents, dir);
2225 startFileInternal(pc, src, permissions, holder, clientMachine, create,
2226 overwrite, createParent, replication, blockSize, logRetryCache);
2227 stat = dir.getFileInfo(src, false);
2228 } catch (StandbyException se) {
2229 skipSync = true;
2230 throw se;
2231 } finally {
2232 writeUnlock();
2233 // There might be transactions logged while trying to recover the lease.
2234 // They need to be sync'ed even when an exception was thrown.
2235 if (!skipSync) {
2236 getEditLog().logSync();
2237 }
2238 }
2239 logAuditEvent(true, "create", src, null, stat);
2240 return stat;
2241 }
2242
2243 /**
2244 * Create a new file or overwrite an existing file<br>
2245 *
2246 * Once the file is create the client then allocates a new block with the next
2247 * call using {@link NameNode#addBlock()}.
2248 * <p>
2249 * For description of parameters and exceptions thrown see
2250 * {@link ClientProtocol#create()}
2251 */
2252 private void startFileInternal(FSPermissionChecker pc, String src,
2253 PermissionStatus permissions, String holder, String clientMachine,
2254 boolean create, boolean overwrite, boolean createParent,
2255 short replication, long blockSize, boolean logRetryEntry)
2256 throws FileAlreadyExistsException, AccessControlException,
2257 UnresolvedLinkException, FileNotFoundException,
2258 ParentNotDirectoryException, IOException {
2259 assert hasWriteLock();
2260 // Verify that the destination does not exist as a directory already.
2261 final INodesInPath iip = dir.getINodesInPath4Write(src);
2262 final INode inode = iip.getLastINode();
2263 if (inode != null && inode.isDirectory()) {
2264 throw new FileAlreadyExistsException(src +
2265 " already exists as a directory");
2266 }
2267 final INodeFile myFile = INodeFile.valueOf(inode, src, true);
2268 if (isPermissionEnabled) {
2269 if (overwrite && myFile != null) {
2270 checkPathAccess(pc, src, FsAction.WRITE);
2271 } else {
2272 checkAncestorAccess(pc, src, FsAction.WRITE);
2273 }
2274 }
2275
2276 if (!createParent) {
2277 verifyParentDir(src);
2278 }
2279
2280 try {
2281 if (myFile == null) {
2282 if (!create) {
2283 throw new FileNotFoundException("Can't overwrite non-existent " +
2284 src + " for client " + clientMachine);
2285 }
2286 } else {
2287 if (overwrite) {
2288 try {
2289 deleteInt(src, true, false); // File exists - delete if overwrite
2290 } catch (AccessControlException e) {
2291 logAuditEvent(false, "delete", src);
2292 throw e;
2293 }
2294 } else {
2295 // If lease soft limit time is expired, recover the lease
2296 recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2297 throw new FileAlreadyExistsException(src + " for client " +
2298 clientMachine + " already exists");
2299 }
2300 }
2301
2302 checkFsObjectLimit();
2303 final DatanodeDescriptor clientNode =
2304 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2305
2306 INodeFile newNode = dir.addFile(src, permissions, replication, blockSize,
2307 holder, clientMachine, clientNode);
2308 if (newNode == null) {
2309 throw new IOException("Unable to add " + src + " to namespace");
2310 }
2311 leaseManager.addLease(newNode.getFileUnderConstructionFeature()
2312 .getClientName(), src);
2313
2314 // record file record in log, record new generation stamp
2315 getEditLog().logOpenFile(src, newNode, logRetryEntry);
2316 if (NameNode.stateChangeLog.isDebugEnabled()) {
2317 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " +
2318 src + " inode " + newNode.getId() + " " + holder);
2319 }
2320 } catch (IOException ie) {
2321 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
2322 ie.getMessage());
2323 throw ie;
2324 }
2325 }
2326
2327 /**
2328 * Append to an existing file for append.
2329 * <p>
2330 *
2331 * The method returns the last block of the file if this is a partial block,
2332 * which can still be used for writing more data. The client uses the returned
2333 * block locations to form the data pipeline for this block.<br>
2334 * The method returns null if the last block is full. The client then
2335 * allocates a new block with the next call using {@link NameNode#addBlock()}.
2336 * <p>
2337 *
2338 * For description of parameters and exceptions thrown see
2339 * {@link ClientProtocol#append(String, String)}
2340 *
2341 * @return the last block locations if the block is partial or null otherwise
2342 */
2343 private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src,
2344 String holder, String clientMachine, boolean logRetryCache)
2345 throws AccessControlException, UnresolvedLinkException,
2346 FileNotFoundException, IOException {
2347 assert hasWriteLock();
2348 // Verify that the destination does not exist as a directory already.
2349 final INodesInPath iip = dir.getINodesInPath4Write(src);
2350 final INode inode = iip.getLastINode();
2351 if (inode != null && inode.isDirectory()) {
2352 throw new FileAlreadyExistsException("Cannot append to directory " + src
2353 + "; already exists as a directory.");
2354 }
2355 if (isPermissionEnabled) {
2356 checkPathAccess(pc, src, FsAction.WRITE);
2357 }
2358
2359 try {
2360 if (inode == null) {
2361 throw new FileNotFoundException("failed to append to non-existent file "
2362 + src + " for client " + clientMachine);
2363 }
2364 INodeFile myFile = INodeFile.valueOf(inode, src, true);
2365 // Opening an existing file for write - may need to recover lease.
2366 recoverLeaseInternal(myFile, src, holder, clientMachine, false);
2367
2368 // recoverLeaseInternal may create a new InodeFile via
2369 // finalizeINodeFileUnderConstruction so we need to refresh
2370 // the referenced file.
2371 myFile = INodeFile.valueOf(dir.getINode(src), src, true);
2372
2373 final DatanodeDescriptor clientNode =
2374 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
2375 return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode,
2376 true, iip.getLatestSnapshotId(), logRetryCache);
2377 } catch (IOException ie) {
2378 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
2379 throw ie;
2380 }
2381 }
2382
2383 /**
2384 * Replace current node with a INodeUnderConstruction.
2385 * Recreate in-memory lease record.
2386 *
2387 * @param src path to the file
2388 * @param file existing file object
2389 * @param leaseHolder identifier of the lease holder on this file
2390 * @param clientMachine identifier of the client machine
2391 * @param clientNode if the client is collocated with a DN, that DN's descriptor
2392 * @param writeToEditLog whether to persist this change to the edit log
2393 * @param logRetryCache whether to record RPC ids in editlog for retry cache
2394 * rebuilding
2395 * @return the last block locations if the block is partial or null otherwise
2396 * @throws UnresolvedLinkException
2397 * @throws IOException
2398 */
2399 LocatedBlock prepareFileForWrite(String src, INodeFile file,
2400 String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
2401 boolean writeToEditLog, int latestSnapshot, boolean logRetryCache)
2402 throws IOException {
2403 file = file.recordModification(latestSnapshot);
2404 final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine,
2405 clientNode);
2406
2407 leaseManager.addLease(cons.getFileUnderConstructionFeature()
2408 .getClientName(), src);
2409
2410 LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons);
2411 if (writeToEditLog) {
2412 getEditLog().logOpenFile(src, cons, logRetryCache);
2413 }
2414 return ret;
2415 }
2416
2417 /**
2418 * Recover lease;
2419 * Immediately revoke the lease of the current lease holder and start lease
2420 * recovery so that the file can be forced to be closed.
2421 *
2422 * @param src the path of the file to start lease recovery
2423 * @param holder the lease holder's name
2424 * @param clientMachine the client machine's name
2425 * @return true if the file is already closed
2426 * @throws IOException
2427 */
2428 boolean recoverLease(String src, String holder, String clientMachine)
2429 throws IOException {
2430 if (!DFSUtil.isValidName(src)) {
2431 throw new IOException("Invalid file name: " + src);
2432 }
2433
2434 boolean skipSync = false;
2435 FSPermissionChecker pc = getPermissionChecker();
2436 checkOperation(OperationCategory.WRITE);
2437 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2438 writeLock();
2439 try {
2440 checkOperation(OperationCategory.WRITE);
2441 checkNameNodeSafeMode("Cannot recover the lease of " + src);
2442 src = FSDirectory.resolvePath(src, pathComponents, dir);
2443 final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
2444 if (!inode.isUnderConstruction()) {
2445 return true;
2446 }
2447 if (isPermissionEnabled) {
2448 checkPathAccess(pc, src, FsAction.WRITE);
2449 }
2450
2451 recoverLeaseInternal(inode, src, holder, clientMachine, true);
2452 } catch (StandbyException se) {
2453 skipSync = true;
2454 throw se;
2455 } finally {
2456 writeUnlock();
2457 // There might be transactions logged while trying to recover the lease.
2458 // They need to be sync'ed even when an exception was thrown.
2459 if (!skipSync) {
2460 getEditLog().logSync();
2461 }
2462 }
2463 return false;
2464 }
2465
2466 private void recoverLeaseInternal(INodeFile fileInode,
2467 String src, String holder, String clientMachine, boolean force)
2468 throws IOException {
2469 assert hasWriteLock();
2470 if (fileInode != null && fileInode.isUnderConstruction()) {
2471 //
2472 // If the file is under construction , then it must be in our
2473 // leases. Find the appropriate lease record.
2474 //
2475 Lease lease = leaseManager.getLease(holder);
2476 //
2477 // We found the lease for this file. And surprisingly the original
2478 // holder is trying to recreate this file. This should never occur.
2479 //
2480 if (!force && lease != null) {
2481 Lease leaseFile = leaseManager.getLeaseByPath(src);
2482 if ((leaseFile != null && leaseFile.equals(lease)) ||
2483 lease.getHolder().equals(holder)) {
2484 throw new AlreadyBeingCreatedException(
2485 "failed to create file " + src + " for " + holder +
2486 " for client " + clientMachine +
2487 " because current leaseholder is trying to recreate file.");
2488 }
2489 }
2490 //
2491 // Find the original holder.
2492 //
2493 FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature();
2494 String clientName = uc.getClientName();
2495 lease = leaseManager.getLease(clientName);
2496 if (lease == null) {
2497 throw new AlreadyBeingCreatedException(
2498 "failed to create file " + src + " for " + holder +
2499 " for client " + clientMachine +
2500 " because pendingCreates is non-null but no leases found.");
2501 }
2502 if (force) {
2503 // close now: no need to wait for soft lease expiration and
2504 // close only the file src
2505 LOG.info("recoverLease: " + lease + ", src=" + src +
2506 " from client " + clientName);
2507 internalReleaseLease(lease, src, holder);
2508 } else {
2509 assert lease.getHolder().equals(clientName) :
2510 "Current lease holder " + lease.getHolder() +
2511 " does not match file creator " + clientName;
2512 //
2513 // If the original holder has not renewed in the last SOFTLIMIT
2514 // period, then start lease recovery.
2515 //
2516 if (lease.expiredSoftLimit()) {
2517 LOG.info("startFile: recover " + lease + ", src=" + src + " client "
2518 + clientName);
2519 boolean isClosed = internalReleaseLease(lease, src, null);
2520 if(!isClosed)
2521 throw new RecoveryInProgressException(
2522 "Failed to close file " + src +
2523 ". Lease recovery is in progress. Try again later.");
2524 } else {
2525 final BlockInfo lastBlock = fileInode.getLastBlock();
2526 if (lastBlock != null
2527 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) {
2528 throw new RecoveryInProgressException("Recovery in progress, file ["
2529 + src + "], " + "lease owner [" + lease.getHolder() + "]");
2530 } else {
2531 throw new AlreadyBeingCreatedException("Failed to create file ["
2532 + src + "] for [" + holder + "] for client [" + clientMachine
2533 + "], because this file is already being created by ["
2534 + clientName + "] on ["
2535 + uc.getClientMachine() + "]");
2536 }
2537 }
2538 }
2539 }
2540 }
2541
2542 /**
2543 * Append to an existing file in the namespace.
2544 */
2545 LocatedBlock appendFile(String src, String holder, String clientMachine)
2546 throws AccessControlException, SafeModeException,
2547 FileAlreadyExistsException, FileNotFoundException,
2548 ParentNotDirectoryException, IOException {
2549 LocatedBlock lb = null;
2550 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
2551 null);
2552 if (cacheEntry != null && cacheEntry.isSuccess()) {
2553 return (LocatedBlock) cacheEntry.getPayload();
2554 }
2555
2556 boolean success = false;
2557 try {
2558 lb = appendFileInt(src, holder, clientMachine, cacheEntry != null);
2559 success = true;
2560 return lb;
2561 } catch (AccessControlException e) {
2562 logAuditEvent(false, "append", src);
2563 throw e;
2564 } finally {
2565 RetryCache.setState(cacheEntry, success, lb);
2566 }
2567 }
2568
2569 private LocatedBlock appendFileInt(String src, String holder,
2570 String clientMachine, boolean logRetryCache)
2571 throws AccessControlException, SafeModeException,
2572 FileAlreadyExistsException, FileNotFoundException,
2573 ParentNotDirectoryException, IOException {
2574 if (NameNode.stateChangeLog.isDebugEnabled()) {
2575 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
2576 + ", holder=" + holder
2577 + ", clientMachine=" + clientMachine);
2578 }
2579 boolean skipSync = false;
2580 if (!supportAppends) {
2581 throw new UnsupportedOperationException(
2582 "Append is not enabled on this NameNode. Use the " +
2583 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
2584 }
2585
2586 LocatedBlock lb = null;
2587 FSPermissionChecker pc = getPermissionChecker();
2588 checkOperation(OperationCategory.WRITE);
2589 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2590 writeLock();
2591 try {
2592 checkOperation(OperationCategory.WRITE);
2593 checkNameNodeSafeMode("Cannot append to file" + src);
2594 src = FSDirectory.resolvePath(src, pathComponents, dir);
2595 lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache);
2596 } catch (StandbyException se) {
2597 skipSync = true;
2598 throw se;
2599 } finally {
2600 writeUnlock();
2601 // There might be transactions logged while trying to recover the lease.
2602 // They need to be sync'ed even when an exception was thrown.
2603 if (!skipSync) {
2604 getEditLog().logSync();
2605 }
2606 }
2607 if (lb != null) {
2608 if (NameNode.stateChangeLog.isDebugEnabled()) {
2609 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
2610 +src+" for "+holder+" at "+clientMachine
2611 +" block " + lb.getBlock()
2612 +" block size " + lb.getBlock().getNumBytes());
2613 }
2614 }
2615 logAuditEvent(true, "append", src);
2616 return lb;
2617 }
2618
2619 ExtendedBlock getExtendedBlock(Block blk) {
2620 return new ExtendedBlock(blockPoolId, blk);
2621 }
2622
2623 void setBlockPoolId(String bpid) {
2624 blockPoolId = bpid;
2625 blockManager.setBlockPoolId(blockPoolId);
2626 }
2627
2628 /**
2629 * The client would like to obtain an additional block for the indicated
2630 * filename (which is being written-to). Return an array that consists
2631 * of the block, plus a set of machines. The first on this list should
2632 * be where the client writes data. Subsequent items in the list must
2633 * be provided in the connection to the first datanode.
2634 *
2635 * Make sure the previous blocks have been reported by datanodes and
2636 * are replicated. Will return an empty 2-elt array if we want the
2637 * client to "try again later".
2638 */
2639 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
2640 ExtendedBlock previous, Set<Node> excludedNodes,
2641 List<String> favoredNodes)
2642 throws LeaseExpiredException, NotReplicatedYetException,
2643 QuotaExceededException, SafeModeException, UnresolvedLinkException,
2644 IOException {
2645 long blockSize;
2646 int replication;
2647 DatanodeDescriptor clientNode = null;
2648
2649 if(NameNode.stateChangeLog.isDebugEnabled()) {
2650 NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: "
2651 + src + " inodeId " + fileId + " for " + clientName);
2652 }
2653
2654 // Part I. Analyze the state of the file with respect to the input data.
2655 checkOperation(OperationCategory.READ);
2656 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2657 readLock();
2658 try {
2659 checkOperation(OperationCategory.READ);
2660 src = FSDirectory.resolvePath(src, pathComponents, dir);
2661 LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2662 final INode[] inodes = analyzeFileState(
2663 src, fileId, clientName, previous, onRetryBlock).getINodes();
2664 final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2665
2666 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) {
2667 // This is a retry. Just return the last block if having locations.
2668 return onRetryBlock[0];
2669 }
2670 if (pendingFile.getBlocks().length >= maxBlocksPerFile) {
2671 throw new IOException("File has reached the limit on maximum number of"
2672 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY
2673 + "): " + pendingFile.getBlocks().length + " >= "
2674 + maxBlocksPerFile);
2675 }
2676 blockSize = pendingFile.getPreferredBlockSize();
2677 clientNode = pendingFile.getFileUnderConstructionFeature().getClientNode();
2678 replication = pendingFile.getFileReplication();
2679 } finally {
2680 readUnlock();
2681 }
2682
2683 // choose targets for the new block to be allocated.
2684 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget(
2685 src, replication, clientNode, excludedNodes, blockSize, favoredNodes);
2686
2687 // Part II.
2688 // Allocate a new block, add it to the INode and the BlocksMap.
2689 Block newBlock = null;
2690 long offset;
2691 checkOperation(OperationCategory.WRITE);
2692 writeLock();
2693 try {
2694 checkOperation(OperationCategory.WRITE);
2695 // Run the full analysis again, since things could have changed
2696 // while chooseTarget() was executing.
2697 LocatedBlock[] onRetryBlock = new LocatedBlock[1];
2698 INodesInPath inodesInPath =
2699 analyzeFileState(src, fileId, clientName, previous, onRetryBlock);
2700 final INode[] inodes = inodesInPath.getINodes();
2701 final INodeFile pendingFile = inodes[inodes.length - 1].asFile();
2702
2703 if (onRetryBlock[0] != null) {
2704 if (onRetryBlock[0].getLocations().length > 0) {
2705 // This is a retry. Just return the last block if having locations.
2706 return onRetryBlock[0];
2707 } else {
2708 // add new chosen targets to already allocated block and return
2709 BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2710 ((BlockInfoUnderConstruction) lastBlockInFile)
2711 .setExpectedLocations(targets);
2712 offset = pendingFile.computeFileSize();
2713 return makeLocatedBlock(lastBlockInFile, targets, offset);
2714 }
2715 }
2716
2717 // commit the last block and complete it if it has minimum replicas
2718 commitOrCompleteLastBlock(pendingFile,
2719 ExtendedBlock.getLocalBlock(previous));
2720
2721 // allocate new block, record block locations in INode.
2722 newBlock = createNewBlock();
2723 saveAllocatedBlock(src, inodesInPath, newBlock, targets);
2724
2725 dir.persistNewBlock(src, pendingFile);
2726 offset = pendingFile.computeFileSize();
2727 } finally {
2728 writeUnlock();
2729 }
2730 getEditLog().logSync();
2731
2732 // Return located block
2733 return makeLocatedBlock(newBlock, targets, offset);
2734 }
2735
2736 INodesInPath analyzeFileState(String src,
2737 long fileId,
2738 String clientName,
2739 ExtendedBlock previous,
2740 LocatedBlock[] onRetryBlock)
2741 throws IOException {
2742 assert hasReadLock();
2743
2744 checkBlock(previous);
2745 onRetryBlock[0] = null;
2746 checkOperation(OperationCategory.WRITE);
2747 checkNameNodeSafeMode("Cannot add block to " + src);
2748
2749 // have we exceeded the configured limit of fs objects.
2750 checkFsObjectLimit();
2751
2752 Block previousBlock = ExtendedBlock.getLocalBlock(previous);
2753 final INodesInPath iip = dir.getINodesInPath4Write(src);
2754 final INodeFile pendingFile
2755 = checkLease(src, fileId, clientName, iip.getLastINode());
2756 BlockInfo lastBlockInFile = pendingFile.getLastBlock();
2757 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) {
2758 // The block that the client claims is the current last block
2759 // doesn't match up with what we think is the last block. There are
2760 // four possibilities:
2761 // 1) This is the first block allocation of an append() pipeline
2762 // which started appending exactly at a block boundary.
2763 // In this case, the client isn't passed the previous block,
2764 // so it makes the allocateBlock() call with previous=null.
2765 // We can distinguish this since the last block of the file
2766 // will be exactly a full block.
2767 // 2) This is a retry from a client that missed the response of a
2768 // prior getAdditionalBlock() call, perhaps because of a network
2769 // timeout, or because of an HA failover. In that case, we know
2770 // by the fact that the client is re-issuing the RPC that it
2771 // never began to write to the old block. Hence it is safe to
2772 // to return the existing block.
2773 // 3) This is an entirely bogus request/bug -- we should error out
2774 // rather than potentially appending a new block with an empty
2775 // one in the middle, etc
2776 // 4) This is a retry from a client that timed out while
2777 // the prior getAdditionalBlock() is still being processed,
2778 // currently working on chooseTarget().
2779 // There are no means to distinguish between the first and
2780 // the second attempts in Part I, because the first one hasn't
2781 // changed the namesystem state yet.
2782 // We run this analysis again in Part II where case 4 is impossible.
2783
2784 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
2785 if (previous == null &&
2786 lastBlockInFile != null &&
2787 lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() &&
2788 lastBlockInFile.isComplete()) {
2789 // Case 1
2790 if (NameNode.stateChangeLog.isDebugEnabled()) {
2791 NameNode.stateChangeLog.debug(
2792 "BLOCK* NameSystem.allocateBlock: handling block allocation" +
2793 " writing to a file with a complete previous block: src=" +
2794 src + " lastBlock=" + lastBlockInFile);
2795 }
2796 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) {
2797 if (lastBlockInFile.getNumBytes() != 0) {
2798 throw new IOException(
2799 "Request looked like a retry to allocate block " +
2800 lastBlockInFile + " but it already contains " +
2801 lastBlockInFile.getNumBytes() + " bytes");
2802 }
2803
2804 // Case 2
2805 // Return the last block.
2806 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " +
2807 "caught retry for allocation of a new block in " +
2808 src + ". Returning previously allocated block " + lastBlockInFile);
2809 long offset = pendingFile.computeFileSize();
2810 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile,
2811 ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(),
2812 offset);
2813 return iip;
2814 } else {
2815 // Case 3
2816 throw new IOException("Cannot allocate block in " + src + ": " +
2817 "passed 'previous' block " + previous + " does not match actual " +
2818 "last block in file " + lastBlockInFile);
2819 }
2820 }
2821
2822 // Check if the penultimate block is minimally replicated
2823 if (!checkFileProgress(pendingFile, false)) {
2824 throw new NotReplicatedYetException("Not replicated yet: " + src);
2825 }
2826 return iip;
2827 }
2828
2829 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs,
2830 long offset) throws IOException {
2831 LocatedBlock lBlk = new LocatedBlock(
2832 getExtendedBlock(blk), locs, offset, false);
2833 getBlockManager().setBlockToken(
2834 lBlk, BlockTokenSecretManager.AccessMode.WRITE);
2835 return lBlk;
2836 }
2837
2838 /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */
2839 LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk,
2840 final DatanodeInfo[] existings, final String[] storageIDs,
2841 final Set<Node> excludes,
2842 final int numAdditionalNodes, final String clientName
2843 ) throws IOException {
2844 //check if the feature is enabled
2845 dtpReplaceDatanodeOnFailure.checkEnabled();
2846
2847 final DatanodeDescriptor clientnode;
2848 final long preferredblocksize;
2849 final List<DatanodeStorageInfo> chosen;
2850 checkOperation(OperationCategory.READ);
2851 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2852 readLock();
2853 try {
2854 checkOperation(OperationCategory.READ);
2855 //check safe mode
2856 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
2857 src = FSDirectory.resolvePath(src, pathComponents, dir);
2858
2859 //check lease
2860 final INodeFile file = checkLease(src, clientName);
2861 clientnode = file.getFileUnderConstructionFeature().getClientNode();
2862 preferredblocksize = file.getPreferredBlockSize();
2863
2864 //find datanode storages
2865 final DatanodeManager dm = blockManager.getDatanodeManager();
2866 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs));
2867 } finally {
2868 readUnlock();
2869 }
2870
2871 // choose new datanodes.
2872 final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy(
2873 ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true,
2874 // TODO: get storage type from the file
2875 excludes, preferredblocksize, StorageType.DEFAULT);
2876 final LocatedBlock lb = new LocatedBlock(blk, targets);
2877 blockManager.setBlockToken(lb, AccessMode.COPY);
2878 return lb;
2879 }
2880
2881 /**
2882 * The client would like to let go of the given block
2883 */
2884 boolean abandonBlock(ExtendedBlock b, String src, String holder)
2885 throws LeaseExpiredException, FileNotFoundException,
2886 UnresolvedLinkException, IOException {
2887 if(NameNode.stateChangeLog.isDebugEnabled()) {
2888 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
2889 + "of file " + src);
2890 }
2891 checkOperation(OperationCategory.WRITE);
2892 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2893 writeLock();
2894 try {
2895 checkOperation(OperationCategory.WRITE);
2896 checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src);
2897 src = FSDirectory.resolvePath(src, pathComponents, dir);
2898
2899 //
2900 // Remove the block from the pending creates list
2901 //
2902 INodeFile file = checkLease(src, holder);
2903 boolean removed = dir.removeBlock(src, file,
2904 ExtendedBlock.getLocalBlock(b));
2905 if (!removed) {
2906 return true;
2907 }
2908 if(NameNode.stateChangeLog.isDebugEnabled()) {
2909 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
2910 + b + " is removed from pendingCreates");
2911 }
2912 dir.persistBlocks(src, file, false);
2913 } finally {
2914 writeUnlock();
2915 }
2916 getEditLog().logSync();
2917
2918 return true;
2919 }
2920
2921 /** make sure that we still have the lease on this file. */
2922 private INodeFile checkLease(String src, String holder)
2923 throws LeaseExpiredException, UnresolvedLinkException,
2924 FileNotFoundException {
2925 return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder,
2926 dir.getINode(src));
2927 }
2928
2929 private INodeFile checkLease(String src, long fileId, String holder,
2930 INode inode) throws LeaseExpiredException, FileNotFoundException {
2931 assert hasReadLock();
2932 if (inode == null || !inode.isFile()) {
2933 Lease lease = leaseManager.getLease(holder);
2934 throw new LeaseExpiredException(
2935 "No lease on " + src + ": File does not exist. "
2936 + (lease != null ? lease.toString()
2937 : "Holder " + holder + " does not have any open files."));
2938 }
2939 final INodeFile file = inode.asFile();
2940 if (!file.isUnderConstruction()) {
2941 Lease lease = leaseManager.getLease(holder);
2942 throw new LeaseExpiredException(
2943 "No lease on " + src + ": File is not open for writing. "
2944 + (lease != null ? lease.toString()
2945 : "Holder " + holder + " does not have any open files."));
2946 }
2947 String clientName = file.getFileUnderConstructionFeature().getClientName();
2948 if (holder != null && !clientName.equals(holder)) {
2949 throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
2950 + clientName + " but is accessed by " + holder);
2951 }
2952 INodeId.checkId(fileId, file);
2953 return file;
2954 }
2955
2956 /**
2957 * Complete in-progress write to the given file.
2958 * @return true if successful, false if the client should continue to retry
2959 * (e.g if not all blocks have reached minimum replication yet)
2960 * @throws IOException on error (eg lease mismatch, file not open, file deleted)
2961 */
2962 boolean completeFile(String src, String holder,
2963 ExtendedBlock last, long fileId)
2964 throws SafeModeException, UnresolvedLinkException, IOException {
2965 if (NameNode.stateChangeLog.isDebugEnabled()) {
2966 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
2967 src + " for " + holder);
2968 }
2969 checkBlock(last);
2970 boolean success = false;
2971 checkOperation(OperationCategory.WRITE);
2972 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
2973 writeLock();
2974 try {
2975 checkOperation(OperationCategory.WRITE);
2976 checkNameNodeSafeMode("Cannot complete file " + src);
2977 src = FSDirectory.resolvePath(src, pathComponents, dir);
2978 success = completeFileInternal(src, holder,
2979 ExtendedBlock.getLocalBlock(last), fileId);
2980 } finally {
2981 writeUnlock();
2982 }
2983 getEditLog().logSync();
2984 if (success) {
2985 NameNode.stateChangeLog.info("DIR* completeFile: " + src
2986 + " is closed by " + holder);
2987 }
2988 return success;
2989 }
2990
2991 private boolean completeFileInternal(String src,
2992 String holder, Block last, long fileId) throws SafeModeException,
2993 UnresolvedLinkException, IOException {
2994 assert hasWriteLock();
2995 final INodesInPath iip = dir.getLastINodeInPath(src);
2996 final INodeFile pendingFile;
2997 try {
2998 pendingFile = checkLease(src, fileId, holder, iip.getINode(0));
2999 } catch (LeaseExpiredException lee) {
3000 final INode inode = dir.getINode(src);
3001 if (inode != null
3002 && inode.isFile()
3003 && !inode.asFile().isUnderConstruction()) {
3004 // This could be a retry RPC - i.e the client tried to close
3005 // the file, but missed the RPC response. Thus, it is trying
3006 // again to close the file. If the file still exists and
3007 // the client's view of the last block matches the actual
3008 // last block, then we'll treat it as a successful close.
3009 // See HDFS-3031.
3010 final Block realLastBlock = inode.asFile().getLastBlock();
3011 if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
3012 NameNode.stateChangeLog.info("DIR* completeFile: " +
3013 "request from " + holder + " to complete " + src +
3014 " which is already closed. But, it appears to be an RPC " +
3015 "retry. Returning success");
3016 return true;
3017 }
3018 }
3019 throw lee;
3020 }
3021 // Check the state of the penultimate block. It should be completed
3022 // before attempting to complete the last one.
3023 if (!checkFileProgress(pendingFile, false)) {
3024 return false;
3025 }
3026
3027 // commit the last block and complete it if it has minimum replicas
3028 commitOrCompleteLastBlock(pendingFile, last);
3029
3030 if (!checkFileProgress(pendingFile, true)) {
3031 return false;
3032 }
3033
3034 finalizeINodeFileUnderConstruction(src, pendingFile,
3035 iip.getLatestSnapshotId());
3036 return true;
3037 }
3038
3039 /**
3040 * Save allocated block at the given pending filename
3041 *
3042 * @param src path to the file
3043 * @param inodesInPath representing each of the components of src.
3044 * The last INode is the INode for the file.
3045 * @throws QuotaExceededException If addition of block exceeds space quota
3046 */
3047 BlockInfo saveAllocatedBlock(String src, INodesInPath inodes,
3048 Block newBlock, DatanodeStorageInfo[] targets)
3049 throws IOException {
3050 assert hasWriteLock();
3051 BlockInfo b = dir.addBlock(src, inodes, newBlock, targets);
3052 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". "
3053 + getBlockPoolId() + " " + b);
3054 DatanodeStorageInfo.incrementBlocksScheduled(targets);
3055 return b;
3056 }
3057
3058 /**
3059 * Create new block with a unique block id and a new generation stamp.
3060 */
3061 Block createNewBlock() throws IOException {
3062 assert hasWriteLock();
3063 Block b = new Block(nextBlockId(), 0, 0);
3064 // Increment the generation stamp for every new block.
3065 b.setGenerationStamp(nextGenerationStamp(false));
3066 return b;
3067 }
3068
3069 /**
3070 * Check that the indicated file's blocks are present and
3071 * replicated. If not, return false. If checkall is true, then check
3072 * all blocks, otherwise check only penultimate block.
3073 */
3074 boolean checkFileProgress(INodeFile v, boolean checkall) {
3075 readLock();
3076 try {
3077 if (checkall) {
3078 //
3079 // check all blocks of the file.
3080 //
3081 for (BlockInfo block: v.getBlocks()) {
3082 if (!block.isComplete()) {
3083 LOG.info("BLOCK* checkFileProgress: " + block
3084 + " has not reached minimal replication "
3085 + blockManager.minReplication);
3086 return false;
3087 }
3088 }
3089 } else {
3090 //
3091 // check the penultimate block of this file
3092 //
3093 BlockInfo b = v.getPenultimateBlock();
3094 if (b != null && !b.isComplete()) {
3095 LOG.warn("BLOCK* checkFileProgress: " + b
3096 + " has not reached minimal replication "
3097 + blockManager.minReplication);
3098 return false;
3099 }
3100 }
3101 return true;
3102 } finally {
3103 readUnlock();
3104 }
3105 }
3106
3107 ////////////////////////////////////////////////////////////////
3108 // Here's how to handle block-copy failure during client write:
3109 // -- As usual, the client's write should result in a streaming
3110 // backup write to a k-machine sequence.
3111 // -- If one of the backup machines fails, no worries. Fail silently.
3112 // -- Before client is allowed to close and finalize file, make sure
3113 // that the blocks are backed up. Namenode may have to issue specific backup
3114 // commands to make up for earlier datanode failures. Once all copies
3115 // are made, edit namespace and return to client.
3116 ////////////////////////////////////////////////////////////////
3117
3118 /**
3119 * Change the indicated filename.
3120 * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead.
3121 */
3122 @Deprecated
3123 boolean renameTo(String src, String dst)
3124 throws IOException, UnresolvedLinkException {
3125 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3126 if (cacheEntry != null && cacheEntry.isSuccess()) {
3127 return true; // Return previous response
3128 }
3129 boolean ret = false;
3130 try {
3131 ret = renameToInt(src, dst, cacheEntry != null);
3132 } catch (AccessControlException e) {
3133 logAuditEvent(false, "rename", src, dst, null);
3134 throw e;
3135 } finally {
3136 RetryCache.setState(cacheEntry, ret);
3137 }
3138 return ret;
3139 }
3140
3141 private boolean renameToInt(String src, String dst, boolean logRetryCache)
3142 throws IOException, UnresolvedLinkException {
3143 if (NameNode.stateChangeLog.isDebugEnabled()) {
3144 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
3145 " to " + dst);
3146 }
3147 if (!DFSUtil.isValidName(dst)) {
3148 throw new IOException("Invalid name: " + dst);
3149 }
3150 FSPermissionChecker pc = getPermissionChecker();
3151 checkOperation(OperationCategory.WRITE);
3152 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3153 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3154 boolean status = false;
3155 HdfsFileStatus resultingStat = null;
3156 writeLock();
3157 try {
3158 checkOperation(OperationCategory.WRITE);
3159 checkNameNodeSafeMode("Cannot rename " + src);
3160 src = FSDirectory.resolvePath(src, srcComponents, dir);
3161 dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3162 checkOperation(OperationCategory.WRITE);
3163 status = renameToInternal(pc, src, dst, logRetryCache);
3164 if (status) {
3165 resultingStat = getAuditFileInfo(dst, false);
3166 }
3167 } finally {
3168 writeUnlock();
3169 }
3170 getEditLog().logSync();
3171 if (status) {
3172 logAuditEvent(true, "rename", src, dst, resultingStat);
3173 }
3174 return status;
3175 }
3176
3177 /** @deprecated See {@link #renameTo(String, String)} */
3178 @Deprecated
3179 private boolean renameToInternal(FSPermissionChecker pc, String src,
3180 String dst, boolean logRetryCache) throws IOException,
3181 UnresolvedLinkException {
3182 assert hasWriteLock();
3183 if (isPermissionEnabled) {
3184 //We should not be doing this. This is move() not renameTo().
3185 //but for now,
3186 //NOTE: yes, this is bad! it's assuming much lower level behavior
3187 // of rewriting the dst
3188 String actualdst = dir.isDir(dst)?
3189 dst + Path.SEPARATOR + new Path(src).getName(): dst;
3190 // Rename does not operates on link targets
3191 // Do not resolveLink when checking permissions of src and dst
3192 // Check write access to parent of src
3193 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3194 // Check write access to ancestor of dst
3195 checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
3196 false);
3197 }
3198
3199 if (dir.renameTo(src, dst, logRetryCache)) {
3200 return true;
3201 }
3202 return false;
3203 }
3204
3205
3206 /** Rename src to dst */
3207 void renameTo(String src, String dst, Options.Rename... options)
3208 throws IOException, UnresolvedLinkException {
3209 if (NameNode.stateChangeLog.isDebugEnabled()) {
3210 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
3211 + src + " to " + dst);
3212 }
3213 if (!DFSUtil.isValidName(dst)) {
3214 throw new InvalidPathException("Invalid name: " + dst);
3215 }
3216 final FSPermissionChecker pc = getPermissionChecker();
3217
3218 checkOperation(OperationCategory.WRITE);
3219 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3220 if (cacheEntry != null && cacheEntry.isSuccess()) {
3221 return; // Return previous response
3222 }
3223 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src);
3224 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst);
3225 HdfsFileStatus resultingStat = null;
3226 boolean success = false;
3227 writeLock();
3228 try {
3229 checkOperation(OperationCategory.WRITE);
3230 checkNameNodeSafeMode("Cannot rename " + src);
3231 src = FSDirectory.resolvePath(src, srcComponents, dir);
3232 dst = FSDirectory.resolvePath(dst, dstComponents, dir);
3233 renameToInternal(pc, src, dst, cacheEntry != null, options);
3234 resultingStat = getAuditFileInfo(dst, false);
3235 success = true;
3236 } finally {
3237 writeUnlock();
3238 RetryCache.setState(cacheEntry, success);
3239 }
3240 getEditLog().logSync();
3241 if (resultingStat != null) {
3242 StringBuilder cmd = new StringBuilder("rename options=");
3243 for (Rename option : options) {
3244 cmd.append(option.value()).append(" ");
3245 }
3246 logAuditEvent(true, cmd.toString(), src, dst, resultingStat);
3247 }
3248 }
3249
3250 private void renameToInternal(FSPermissionChecker pc, String src, String dst,
3251 boolean logRetryCache, Options.Rename... options) throws IOException {
3252 assert hasWriteLock();
3253 if (isPermissionEnabled) {
3254 // Rename does not operates on link targets
3255 // Do not resolveLink when checking permissions of src and dst
3256 // Check write access to parent of src
3257 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
3258 // Check write access to ancestor of dst
3259 checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
3260 }
3261
3262 dir.renameTo(src, dst, logRetryCache, options);
3263 }
3264
3265 /**
3266 * Remove the indicated file from namespace.
3267 *
3268 * @see ClientProtocol#delete(String, boolean) for detailed description and
3269 * description of exceptions
3270 */
3271 boolean delete(String src, boolean recursive)
3272 throws AccessControlException, SafeModeException,
3273 UnresolvedLinkException, IOException {
3274 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
3275 if (cacheEntry != null && cacheEntry.isSuccess()) {
3276 return true; // Return previous response
3277 }
3278 boolean ret = false;
3279 try {
3280 ret = deleteInt(src, recursive, cacheEntry != null);
3281 } catch (AccessControlException e) {
3282 logAuditEvent(false, "delete", src);
3283 throw e;
3284 } finally {
3285 RetryCache.setState(cacheEntry, ret);
3286 }
3287 return ret;
3288 }
3289
3290 private boolean deleteInt(String src, boolean recursive, boolean logRetryCache)
3291 throws AccessControlException, SafeModeException,
3292 UnresolvedLinkException, IOException {
3293 if (NameNode.stateChangeLog.isDebugEnabled()) {
3294 NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
3295 }
3296 boolean status = deleteInternal(src, recursive, true, logRetryCache);
3297 if (status) {
3298 logAuditEvent(true, "delete", src);
3299 }
3300 return status;
3301 }
3302
3303 private FSPermissionChecker getPermissionChecker()
3304 throws AccessControlException {
3305 try {
3306 return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser());
3307 } catch (IOException ioe) {
3308 throw new AccessControlException(ioe);
3309 }
3310 }
3311
3312 /**
3313 * Remove a file/directory from the namespace.
3314 * <p>
3315 * For large directories, deletion is incremental. The blocks under
3316 * the directory are collected and deleted a small number at a time holding
3317 * the {@link FSNamesystem} lock.
3318 * <p>
3319 * For small directory or file the deletion is done in one shot.
3320 *
3321 * @see ClientProtocol#delete(String, boolean) for description of exceptions
3322 */
3323 private boolean deleteInternal(String src, boolean recursive,
3324 boolean enforcePermission, boolean logRetryCache)
3325 throws AccessControlException, SafeModeException, UnresolvedLinkException,
3326 IOException {
3327 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
3328 List<INode> removedINodes = new ChunkedArrayList<INode>();
3329 FSPermissionChecker pc = getPermissionChecker();
3330 checkOperation(OperationCategory.WRITE);
3331 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3332 boolean ret = false;
3333 writeLock();
3334 try {
3335 checkOperation(OperationCategory.WRITE);
3336 checkNameNodeSafeMode("Cannot delete " + src);
3337 src = FSDirectory.resolvePath(src, pathComponents, dir);
3338 if (!recursive && dir.isNonEmptyDirectory(src)) {
3339 throw new IOException(src + " is non empty");
3340 }
3341 if (enforcePermission && isPermissionEnabled) {
3342 checkPermission(pc, src, false, null, FsAction.WRITE, null,
3343 FsAction.ALL, false);
3344 }
3345 // Unlink the target directory from directory tree
3346 if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
3347 return false;
3348 }
3349 ret = true;
3350 } finally {
3351 writeUnlock();
3352 }
3353 getEditLog().logSync();
3354 removeBlocks(collectedBlocks); // Incremental deletion of blocks
3355 collectedBlocks.clear();
3356 dir.writeLock();
3357 try {
3358 dir.removeFromInodeMap(removedINodes);
3359 } finally {
3360 dir.writeUnlock();
3361 }
3362 removedINodes.clear();
3363 if (NameNode.stateChangeLog.isDebugEnabled()) {
3364 NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
3365 + src +" is removed");
3366 }
3367 return ret;
3368 }
3369
3370 /**
3371 * From the given list, incrementally remove the blocks from blockManager
3372 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to
3373 * ensure that other waiters on the lock can get in. See HDFS-2938
3374 *
3375 * @param blocks
3376 * An instance of {@link BlocksMapUpdateInfo} which contains a list
3377 * of blocks that need to be removed from blocksMap
3378 */
3379 void removeBlocks(BlocksMapUpdateInfo blocks) {
3380 List<Block> toDeleteList = blocks.getToDeleteList();
3381 Iterator<Block> iter = toDeleteList.iterator();
3382 while (iter.hasNext()) {
3383 writeLock();
3384 try {
3385 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) {
3386 blockManager.removeBlock(iter.next());
3387 }
3388 } finally {
3389 writeUnlock();
3390 }
3391 }
3392 }
3393
3394 /**
3395 * Remove leases, inodes and blocks related to a given path
3396 * @param src The given path
3397 * @param blocks Containing the list of blocks to be deleted from blocksMap
3398 * @param removedINodes Containing the list of inodes to be removed from
3399 * inodesMap
3400 */
3401 void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
3402 List<INode> removedINodes) {
3403 assert hasWriteLock();
3404 leaseManager.removeLeaseWithPrefixPath(src);
3405 // remove inodes from inodesMap
3406 if (removedINodes != null) {
3407 dir.removeFromInodeMap(removedINodes);
3408 removedINodes.clear();
3409 }
3410 if (blocks == null) {
3411 return;
3412 }
3413
3414 removeBlocksAndUpdateSafemodeTotal(blocks);
3415 }
3416
3417 /**
3418 * Removes the blocks from blocksmap and updates the safemode blocks total
3419 *
3420 * @param blocks
3421 * An instance of {@link BlocksMapUpdateInfo} which contains a list
3422 * of blocks that need to be removed from blocksMap
3423 */
3424 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
3425 assert hasWriteLock();
3426 // In the case that we are a Standby tailing edits from the
3427 // active while in safe-mode, we need to track the total number
3428 // of blocks and safe blocks in the system.
3429 boolean trackBlockCounts = isSafeModeTrackingBlocks();
3430 int numRemovedComplete = 0, numRemovedSafe = 0;
3431
3432 for (Block b : blocks.getToDeleteList()) {
3433 if (trackBlockCounts) {
3434 BlockInfo bi = getStoredBlock(b);
3435 if (bi.isComplete()) {
3436 numRemovedComplete++;
3437 if (bi.numNodes() >= blockManager.minReplication) {
3438 numRemovedSafe++;
3439 }
3440 }
3441 }
3442 blockManager.removeBlock(b);
3443 }
3444 if (trackBlockCounts) {
3445 if (LOG.isDebugEnabled()) {
3446 LOG.debug("Adjusting safe-mode totals for deletion."
3447 + "decreasing safeBlocks by " + numRemovedSafe
3448 + ", totalBlocks by " + numRemovedComplete);
3449 }
3450 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
3451 }
3452 }
3453
3454 /**
3455 * @see SafeModeInfo#shouldIncrementallyTrackBlocks
3456 */
3457 private boolean isSafeModeTrackingBlocks() {
3458 if (!haEnabled) {
3459 // Never track blocks incrementally in non-HA code.
3460 return false;
3461 }
3462 SafeModeInfo sm = this.safeMode;
3463 return sm != null && sm.shouldIncrementallyTrackBlocks();
3464 }
3465
3466 /**
3467 * Get the file info for a specific file.
3468 *
3469 * @param src The string representation of the path to the file
3470 * @param resolveLink whether to throw UnresolvedLinkException
3471 * if src refers to a symlink
3472 *
3473 * @throws AccessControlException if access is denied
3474 * @throws UnresolvedLinkException if a symlink is encountered.
3475 *
3476 * @return object containing information regarding the file
3477 * or null if file not found
3478 * @throws StandbyException
3479 */
3480 HdfsFileStatus getFileInfo(String src, boolean resolveLink)
3481 throws AccessControlException, UnresolvedLinkException,
3482 StandbyException, IOException {
3483 if (!DFSUtil.isValidName(src)) {
3484 throw new InvalidPathException("Invalid file name: " + src);
3485 }
3486 HdfsFileStatus stat = null;
3487 FSPermissionChecker pc = getPermissionChecker();
3488 checkOperation(OperationCategory.READ);
3489 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3490 readLock();
3491 try {
3492 checkOperation(OperationCategory.READ);
3493 src = FSDirectory.resolvePath(src, pathComponents, dir);
3494 if (isPermissionEnabled) {
3495 checkPermission(pc, src, false, null, null, null, null, resolveLink);
3496 }
3497 stat = dir.getFileInfo(src, resolveLink);
3498 } catch (AccessControlException e) {
3499 logAuditEvent(false, "getfileinfo", src);
3500 throw e;
3501 } finally {
3502 readUnlock();
3503 }
3504 logAuditEvent(true, "getfileinfo", src);
3505 return stat;
3506 }
3507
3508 /**
3509 * Returns true if the file is closed
3510 */
3511 boolean isFileClosed(String src)
3512 throws AccessControlException, UnresolvedLinkException,
3513 StandbyException, IOException {
3514 FSPermissionChecker pc = getPermissionChecker();
3515 checkOperation(OperationCategory.READ);
3516 readLock();
3517 try {
3518 checkOperation(OperationCategory.READ);
3519 if (isPermissionEnabled) {
3520 checkTraverse(pc, src);
3521 }
3522 return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction();
3523 } catch (AccessControlException e) {
3524 if (isAuditEnabled() && isExternalInvocation()) {
3525 logAuditEvent(false, "isFileClosed", src);
3526 }
3527 throw e;
3528 } finally {
3529 readUnlock();
3530 }
3531 }
3532
3533 /**
3534 * Create all the necessary directories
3535 */
3536 boolean mkdirs(String src, PermissionStatus permissions,
3537 boolean createParent) throws IOException, UnresolvedLinkException {
3538 boolean ret = false;
3539 try {
3540 ret = mkdirsInt(src, permissions, createParent);
3541 } catch (AccessControlException e) {
3542 logAuditEvent(false, "mkdirs", src);
3543 throw e;
3544 }
3545 return ret;
3546 }
3547
3548 private boolean mkdirsInt(String src, PermissionStatus permissions,
3549 boolean createParent) throws IOException, UnresolvedLinkException {
3550 if(NameNode.stateChangeLog.isDebugEnabled()) {
3551 NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
3552 }
3553 if (!DFSUtil.isValidName(src)) {
3554 throw new InvalidPathException(src);
3555 }
3556 FSPermissionChecker pc = getPermissionChecker();
3557 checkOperation(OperationCategory.WRITE);
3558 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3559 HdfsFileStatus resultingStat = null;
3560 boolean status = false;
3561 writeLock();
3562 try {
3563 checkOperation(OperationCategory.WRITE);
3564 checkNameNodeSafeMode("Cannot create directory " + src);
3565 src = FSDirectory.resolvePath(src, pathComponents, dir);
3566 status = mkdirsInternal(pc, src, permissions, createParent);
3567 if (status) {
3568 resultingStat = dir.getFileInfo(src, false);
3569 }
3570 } finally {
3571 writeUnlock();
3572 }
3573 getEditLog().logSync();
3574 if (status) {
3575 logAuditEvent(true, "mkdirs", src, null, resultingStat);
3576 }
3577 return status;
3578 }
3579
3580 /**
3581 * Create all the necessary directories
3582 */
3583 private boolean mkdirsInternal(FSPermissionChecker pc, String src,
3584 PermissionStatus permissions, boolean createParent)
3585 throws IOException, UnresolvedLinkException {
3586 assert hasWriteLock();
3587 if (isPermissionEnabled) {
3588 checkTraverse(pc, src);
3589 }
3590 if (dir.isDirMutable(src)) {
3591 // all the users of mkdirs() are used to expect 'true' even if
3592 // a new directory is not created.
3593 return true;
3594 }
3595 if (isPermissionEnabled) {
3596 checkAncestorAccess(pc, src, FsAction.WRITE);
3597 }
3598 if (!createParent) {
3599 verifyParentDir(src);
3600 }
3601
3602 // validate that we have enough inodes. This is, at best, a
3603 // heuristic because the mkdirs() operation might need to
3604 // create multiple inodes.
3605 checkFsObjectLimit();
3606
3607 if (!dir.mkdirs(src, permissions, false, now())) {
3608 throw new IOException("Failed to create directory: " + src);
3609 }
3610 return true;
3611 }
3612
3613 /**
3614 * Get the content summary for a specific file/dir.
3615 *
3616 * @param src The string representation of the path to the file
3617 *
3618 * @throws AccessControlException if access is denied
3619 * @throws UnresolvedLinkException if a symlink is encountered.
3620 * @throws FileNotFoundException if no file exists
3621 * @throws StandbyException
3622 * @throws IOException for issues with writing to the audit log
3623 *
3624 * @return object containing information regarding the file
3625 * or null if file not found
3626 */
3627 ContentSummary getContentSummary(String src) throws IOException {
3628 FSPermissionChecker pc = getPermissionChecker();
3629 checkOperation(OperationCategory.READ);
3630 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3631 readLock();
3632 boolean success = true;
3633 try {
3634 checkOperation(OperationCategory.READ);
3635 src = FSDirectory.resolvePath(src, pathComponents, dir);
3636 if (isPermissionEnabled) {
3637 checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
3638 }
3639 return dir.getContentSummary(src);
3640
3641 } catch (AccessControlException ace) {
3642 success = false;
3643 throw ace;
3644 } finally {
3645 readUnlock();
3646 logAuditEvent(success, "contentSummary", src);
3647 }
3648 }
3649
3650 /**
3651 * Set the namespace quota and diskspace quota for a directory.
3652 * See {@link ClientProtocol#setQuota(String, long, long)} for the
3653 * contract.
3654 *
3655 * Note: This does not support ".inodes" relative path.
3656 */
3657 void setQuota(String path, long nsQuota, long dsQuota)
3658 throws IOException, UnresolvedLinkException {
3659 checkSuperuserPrivilege();
3660 checkOperation(OperationCategory.WRITE);
3661 writeLock();
3662 try {
3663 checkOperation(OperationCategory.WRITE);
3664 checkNameNodeSafeMode("Cannot set quota on " + path);
3665 dir.setQuota(path, nsQuota, dsQuota);
3666 } finally {
3667 writeUnlock();
3668 }
3669 getEditLog().logSync();
3670 }
3671
3672 /** Persist all metadata about this file.
3673 * @param src The string representation of the path
3674 * @param clientName The string representation of the client
3675 * @param lastBlockLength The length of the last block
3676 * under construction reported from client.
3677 * @throws IOException if path does not exist
3678 */
3679 void fsync(String src, String clientName, long lastBlockLength)
3680 throws IOException, UnresolvedLinkException {
3681 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
3682 checkOperation(OperationCategory.WRITE);
3683 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
3684 writeLock();
3685 try {
3686 checkOperation(OperationCategory.WRITE);
3687 checkNameNodeSafeMode("Cannot fsync file " + src);
3688 src = FSDirectory.resolvePath(src, pathComponents, dir);
3689 INodeFile pendingFile = checkLease(src, clientName);
3690 if (lastBlockLength > 0) {
3691 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock(
3692 pendingFile, lastBlockLength);
3693 }
3694 dir.persistBlocks(src, pendingFile, false);
3695 } finally {
3696 writeUnlock();
3697 }
3698 getEditLog().logSync();
3699 }
3700
3701 /**
3702 * Move a file that is being written to be immutable.
3703 * @param src The filename
3704 * @param lease The lease for the client creating the file
3705 * @param recoveryLeaseHolder reassign lease to this holder if the last block
3706 * needs recovery; keep current holder if null.
3707 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal
3708 * replication;<br>
3709 * RecoveryInProgressException if lease recovery is in progress.<br>
3710 * IOException in case of an error.
3711 * @return true if file has been successfully finalized and closed or
3712 * false if block recovery has been initiated. Since the lease owner
3713 * has been changed and logged, caller should call logSync().
3714 */
3715 boolean internalReleaseLease(Lease lease, String src,
3716 String recoveryLeaseHolder) throws AlreadyBeingCreatedException,
3717 IOException, UnresolvedLinkException {
3718 LOG.info("Recovering " + lease + ", src=" + src);
3719 assert !isInSafeMode();
3720 assert hasWriteLock();
3721
3722 final INodesInPath iip = dir.getLastINodeInPath(src);
3723 final INodeFile pendingFile = iip.getINode(0).asFile();
3724 int nrBlocks = pendingFile.numBlocks();
3725 BlockInfo[] blocks = pendingFile.getBlocks();
3726
3727 int nrCompleteBlocks;
3728 BlockInfo curBlock = null;
3729 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) {
3730 curBlock = blocks[nrCompleteBlocks];
3731 if(!curBlock.isComplete())
3732 break;
3733 assert blockManager.checkMinReplication(curBlock) :
3734 "A COMPLETE block is not minimally replicated in " + src;
3735 }
3736
3737 // If there are no incomplete blocks associated with this file,
3738 // then reap lease immediately and close the file.
3739 if(nrCompleteBlocks == nrBlocks) {
3740 finalizeINodeFileUnderConstruction(src, pendingFile,
3741 iip.getLatestSnapshotId());
3742 NameNode.stateChangeLog.warn("BLOCK*"
3743 + " internalReleaseLease: All existing blocks are COMPLETE,"
3744 + " lease removed, file closed.");
3745 return true; // closed!
3746 }
3747
3748 // Only the last and the penultimate blocks may be in non COMPLETE state.
3749 // If the penultimate block is not COMPLETE, then it must be COMMITTED.
3750 if(nrCompleteBlocks < nrBlocks - 2 ||
3751 nrCompleteBlocks == nrBlocks - 2 &&
3752 curBlock != null &&
3753 curBlock.getBlockUCState() != BlockUCState.COMMITTED) {
3754 final String message = "DIR* NameSystem.internalReleaseLease: "
3755 + "attempt to release a create lock on "
3756 + src + " but file is already closed.";
3757 NameNode.stateChangeLog.warn(message);
3758 throw new IOException(message);
3759 }
3760
3761 // The last block is not COMPLETE, and
3762 // that the penultimate block if exists is either COMPLETE or COMMITTED
3763 final BlockInfo lastBlock = pendingFile.getLastBlock();
3764 BlockUCState lastBlockState = lastBlock.getBlockUCState();
3765 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock();
3766 boolean penultimateBlockMinReplication;
3767 BlockUCState penultimateBlockState;
3768 if (penultimateBlock == null) {
3769 penultimateBlockState = BlockUCState.COMPLETE;
3770 // If penultimate block doesn't exist then its minReplication is met
3771 penultimateBlockMinReplication = true;
3772 } else {
3773 penultimateBlockState = BlockUCState.COMMITTED;
3774 penultimateBlockMinReplication =
3775 blockManager.checkMinReplication(penultimateBlock);
3776 }
3777 assert penultimateBlockState == BlockUCState.COMPLETE ||
3778 penultimateBlockState == BlockUCState.COMMITTED :
3779 "Unexpected state of penultimate block in " + src;
3780
3781 switch(lastBlockState) {
3782 case COMPLETE:
3783 assert false : "Already checked that the last block is incomplete";
3784 break;
3785 case COMMITTED:
3786 // Close file if committed blocks are minimally replicated
3787 if(penultimateBlockMinReplication &&
3788 blockManager.checkMinReplication(lastBlock)) {
3789 finalizeINodeFileUnderConstruction(src, pendingFile,
3790 iip.getLatestSnapshotId());
3791 NameNode.stateChangeLog.warn("BLOCK*"
3792 + " internalReleaseLease: Committed blocks are minimally replicated,"
3793 + " lease removed, file closed.");
3794 return true; // closed!
3795 }
3796 // Cannot close file right now, since some blocks
3797 // are not yet minimally replicated.
3798 // This may potentially cause infinite loop in lease recovery
3799 // if there are no valid replicas on data-nodes.
3800 String message = "DIR* NameSystem.internalReleaseLease: " +
3801 "Failed to release lease for file " + src +
3802 ". Committed blocks are waiting to be minimally replicated." +
3803 " Try again later.";
3804 NameNode.stateChangeLog.warn(message);
3805 throw new AlreadyBeingCreatedException(message);
3806 case UNDER_CONSTRUCTION:
3807 case UNDER_RECOVERY:
3808 final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock;
3809 // setup the last block locations from the blockManager if not known
3810 if (uc.getNumExpectedLocations() == 0) {
3811 uc.setExpectedLocations(blockManager.getStorages(lastBlock));
3812 }
3813
3814 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) {
3815 // There is no datanode reported to this block.
3816 // may be client have crashed before writing data to pipeline.
3817 // This blocks doesn't need any recovery.
3818 // We can remove this block and close the file.
3819 pendingFile.removeLastBlock(lastBlock);
3820 finalizeINodeFileUnderConstruction(src, pendingFile,
3821 iip.getLatestSnapshotId());
3822 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
3823 + "Removed empty last block and closed file.");
3824 return true;
3825 }
3826 // start recovery of the last block for this file
3827 long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc));
3828 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
3829 uc.initializeBlockRecovery(blockRecoveryId);
3830 leaseManager.renewLease(lease);
3831 // Cannot close file right now, since the last block requires recovery.
3832 // This may potentially cause infinite loop in lease recovery
3833 // if there are no valid replicas on data-nodes.
3834 NameNode.stateChangeLog.warn(
3835 "DIR* NameSystem.internalReleaseLease: " +
3836 "File " + src + " has not been closed." +
3837 " Lease recovery is in progress. " +
3838 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock);
3839 break;
3840 }
3841 return false;
3842 }
3843
3844 private Lease reassignLease(Lease lease, String src, String newHolder,
3845 INodeFile pendingFile) {
3846 assert hasWriteLock();
3847 if(newHolder == null)
3848 return lease;
3849 // The following transaction is not synced. Make sure it's sync'ed later.
3850 logReassignLease(lease.getHolder(), src, newHolder);
3851 return reassignLeaseInternal(lease, src, newHolder, pendingFile);
3852 }
3853
3854 Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
3855 INodeFile pendingFile) {
3856 assert hasWriteLock();
3857 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
3858 return leaseManager.reassignLease(lease, src, newHolder);
3859 }
3860
3861 private void commitOrCompleteLastBlock(final INodeFile fileINode,
3862 final Block commitBlock) throws IOException {
3863 assert hasWriteLock();
3864 Preconditions.checkArgument(fileINode.isUnderConstruction());
3865 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) {
3866 return;
3867 }
3868
3869 // Adjust disk space consumption if required
3870 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();
3871 if (diff > 0) {
3872 try {
3873 String path = fileINode.getFullPathName();
3874 dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
3875 } catch (IOException e) {
3876 LOG.warn("Unexpected exception while updating disk space.", e);
3877 }
3878 }
3879 }
3880
3881 private void finalizeINodeFileUnderConstruction(String src,
3882 INodeFile pendingFile, int latestSnapshot) throws IOException,
3883 UnresolvedLinkException {
3884 assert hasWriteLock();
3885 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
3886 Preconditions.checkArgument(uc != null);
3887 leaseManager.removeLease(uc.getClientName(), src);
3888
3889 pendingFile = pendingFile.recordModification(latestSnapshot);
3890
3891 // The file is no longer pending.
3892 // Create permanent INode, update blocks. No need to replace the inode here
3893 // since we just remove the uc feature from pendingFile
3894 final INodeFile newFile = pendingFile.toCompleteFile(now());
3895
3896 // close file and persist block allocations for this file
3897 dir.closeFile(src, newFile);
3898
3899 blockManager.checkReplication(newFile);
3900 }
3901
3902 @VisibleForTesting
3903 BlockInfo getStoredBlock(Block block) {
3904 return blockManager.getStoredBlock(block);
3905 }
3906
3907 @Override
3908 public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) {
3909 assert hasReadLock();
3910 final BlockCollection bc = blockUC.getBlockCollection();
3911 if (bc == null || !(bc instanceof INodeFile)
3912 || !((INodeFile) bc).isUnderConstruction()) {
3913 return false;
3914 }
3915
3916 INodeFile inodeUC = (INodeFile) bc;
3917 String fullName = inodeUC.getName();
3918 try {
3919 if (fullName != null && fullName.startsWith(Path.SEPARATOR)
3920 && dir.getINode(fullName) == inodeUC) {
3921 // If file exists in normal path then no need to look in snapshot
3922 return false;
3923 }
3924 } catch (UnresolvedLinkException e) {
3925 LOG.error("Error while resolving the link : " + fullName, e);
3926 return false;
3927 }
3928 /*
3929 * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and
3930 * bc is not in the current fsdirectory tree, bc must represent a snapshot
3931 * file.
3932 * 2. if fullName is not an absolute path, bc cannot be existent in the
3933 * current fsdirectory tree.
3934 * 3. if bc is not the current node associated with fullName, bc must be a
3935 * snapshot inode.
3936 */
3937 return true;
3938 }
3939
3940 void commitBlockSynchronization(ExtendedBlock lastblock,
3941 long newgenerationstamp, long newlength,
3942 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
3943 String[] newtargetstorages)
3944 throws IOException, UnresolvedLinkException {
3945 LOG.info("commitBlockSynchronization(lastblock=" + lastblock
3946 + ", newgenerationstamp=" + newgenerationstamp
3947 + ", newlength=" + newlength
3948 + ", newtargets=" + Arrays.asList(newtargets)
3949 + ", closeFile=" + closeFile
3950 + ", deleteBlock=" + deleteblock
3951 + ")");
3952 checkOperation(OperationCategory.WRITE);
3953 String src = "";
3954 writeLock();
3955 try {
3956 checkOperation(OperationCategory.WRITE);
3957 // If a DN tries to commit to the standby, the recovery will
3958 // fail, and the next retry will succeed on the new NN.
3959
3960 checkNameNodeSafeMode(
3961 "Cannot commitBlockSynchronization while in safe mode");
3962 final BlockInfo storedBlock = getStoredBlock(
3963 ExtendedBlock.getLocalBlock(lastblock));
3964 if (storedBlock == null) {
3965 if (deleteblock) {
3966 // This may be a retry attempt so ignore the failure
3967 // to locate the block.
3968 if (LOG.isDebugEnabled()) {
3969 LOG.debug("Block (=" + lastblock + ") not found");
3970 }
3971 return;
3972 } else {
3973 throw new IOException("Block (=" + lastblock + ") not found");
3974 }
3975 }
3976 INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile();
3977 if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
3978 if (LOG.isDebugEnabled()) {
3979 LOG.debug("Unexpected block (=" + lastblock
3980 + ") since the file (=" + iFile.getLocalName()
3981 + ") is not under construction");
3982 }
3983 return;
3984 }
3985
3986 long recoveryId =
3987 ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId();
3988 if(recoveryId != newgenerationstamp) {
3989 throw new IOException("The recovery id " + newgenerationstamp
3990 + " does not match current recovery id "
3991 + recoveryId + " for block " + lastblock);
3992 }
3993
3994 if (deleteblock) {
3995 Block blockToDel = ExtendedBlock.getLocalBlock(lastblock);
3996 boolean remove = iFile.removeLastBlock(blockToDel);
3997 if (remove) {
3998 blockManager.removeBlockFromMap(storedBlock);
3999 }
4000 }
4001 else {
4002 // update last block
4003 storedBlock.setGenerationStamp(newgenerationstamp);
4004 storedBlock.setNumBytes(newlength);
4005
4006 // find the DatanodeDescriptor objects
4007 // There should be no locations in the blockManager till now because the
4008 // file is underConstruction
4009 ArrayList<DatanodeDescriptor> trimmedTargets =
4010 new ArrayList<DatanodeDescriptor>(newtargets.length);
4011 ArrayList<String> trimmedStorages =
4012 new ArrayList<String>(newtargets.length);
4013 if (newtargets.length > 0) {
4014 for (int i = 0; i < newtargets.length; ++i) {
4015 // try to get targetNode
4016 DatanodeDescriptor targetNode =
4017 blockManager.getDatanodeManager().getDatanode(newtargets[i]);
4018 if (targetNode != null) {
4019 trimmedTargets.add(targetNode);
4020 trimmedStorages.add(newtargetstorages[i]);
4021 } else if (LOG.isDebugEnabled()) {
4022 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found");
4023 }
4024 }
4025 }
4026 if ((closeFile) && !trimmedTargets.isEmpty()) {
4027 // the file is getting closed. Insert block locations into blockManager.
4028 // Otherwise fsck will report these blocks as MISSING, especially if the
4029 // blocksReceived from Datanodes take a long time to arrive.
4030 for (int i = 0; i < trimmedTargets.size(); i++) {
4031 trimmedTargets.get(i).addBlock(
4032 trimmedStorages.get(i), storedBlock);
4033 }
4034 }
4035
4036 // add pipeline locations into the INodeUnderConstruction
4037 DatanodeStorageInfo[] trimmedStorageInfos =
4038 blockManager.getDatanodeManager().getDatanodeStorageInfos(
4039 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]),
4040 trimmedStorages.toArray(new String[trimmedStorages.size()]));
4041 iFile.setLastBlock(storedBlock, trimmedStorageInfos);
4042 }
4043
4044 if (closeFile) {
4045 src = closeFileCommitBlocks(iFile, storedBlock);
4046 } else {
4047 // If this commit does not want to close the file, persist blocks
4048 src = persistBlocks(iFile, false);
4049 }
4050 } finally {
4051 writeUnlock();
4052 }
4053 getEditLog().logSync();
4054 if (closeFile) {
4055 LOG.info("commitBlockSynchronization(newblock=" + lastblock
4056 + ", file=" + src
4057 + ", newgenerationstamp=" + newgenerationstamp
4058 + ", newlength=" + newlength
4059 + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
4060 } else {
4061 LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
4062 }
4063 }
4064
4065 /**
4066 *
4067 * @param pendingFile
4068 * @param storedBlock
4069 * @return Path of the file that was closed.
4070 * @throws IOException
4071 */
4072 @VisibleForTesting
4073 String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
4074 throws IOException {
4075 String src = pendingFile.getFullPathName();
4076
4077 // commit the last block and complete it if it has minimum replicas
4078 commitOrCompleteLastBlock(pendingFile, storedBlock);
4079
4080 //remove lease, close file
4081 finalizeINodeFileUnderConstruction(src, pendingFile,
4082 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
4083
4084 return src;
4085 }
4086
4087 /**
4088 * Persist the block list for the given file.
4089 *
4090 * @param pendingFile
4091 * @return Path to the given file.
4092 * @throws IOException
4093 */
4094 @VisibleForTesting
4095 String persistBlocks(INodeFile pendingFile, boolean logRetryCache)
4096 throws IOException {
4097 String src = pendingFile.getFullPathName();
4098 dir.persistBlocks(src, pendingFile, logRetryCache);
4099 return src;
4100 }
4101
4102 /**
4103 * Renew the lease(s) held by the given client
4104 */
4105 void renewLease(String holder) throws IOException {
4106 checkOperation(OperationCategory.WRITE);
4107 readLock();
4108 try {
4109 checkOperation(OperationCategory.WRITE);
4110 checkNameNodeSafeMode("Cannot renew lease for " + holder);
4111 leaseManager.renewLease(holder);
4112 } finally {
4113 readUnlock();
4114 }
4115 }
4116
4117 /**
4118 * Get a partial listing of the indicated directory
4119 *
4120 * @param src the directory name
4121 * @param startAfter the name to start after
4122 * @param needLocation if blockLocations need to be returned
4123 * @return a partial listing starting after startAfter
4124 *
4125 * @throws AccessControlException if access is denied
4126 * @throws UnresolvedLinkException if symbolic link is encountered
4127 * @throws IOException if other I/O error occurred
4128 */
4129 DirectoryListing getListing(String src, byte[] startAfter,
4130 boolean needLocation)
4131 throws AccessControlException, UnresolvedLinkException, IOException {
4132 try {
4133 return getListingInt(src, startAfter, needLocation);
4134 } catch (AccessControlException e) {
4135 logAuditEvent(false, "listStatus", src);
4136 throw e;
4137 }
4138 }
4139
4140 private DirectoryListing getListingInt(String src, byte[] startAfter,
4141 boolean needLocation)
4142 throws AccessControlException, UnresolvedLinkException, IOException {
4143 DirectoryListing dl;
4144 FSPermissionChecker pc = getPermissionChecker();
4145 checkOperation(OperationCategory.READ);
4146 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
4147 String startAfterString = new String(startAfter);
4148 readLock();
4149 try {
4150 checkOperation(OperationCategory.READ);
4151 src = FSDirectory.resolvePath(src, pathComponents, dir);
4152
4153 // Get file name when startAfter is an INodePath
4154 if (FSDirectory.isReservedName(startAfterString)) {
4155 byte[][] startAfterComponents = FSDirectory
4156 .getPathComponentsForReservedPath(startAfterString);
4157 try {
4158 String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir);
4159 byte[][] regularPath = INode.getPathComponents(tmp);
4160 startAfter = regularPath[regularPath.length - 1];
4161 } catch (IOException e) {
4162 // Possibly the inode is deleted
4163 throw new DirectoryListingStartAfterNotFoundException(
4164 "Can't find startAfter " + startAfterString);
4165 }
4166 }
4167
4168 if (isPermissionEnabled) {
4169 if (dir.isDir(src)) {
4170 checkPathAccess(pc, src, FsAction.READ_EXECUTE);
4171 } else {
4172 checkTraverse(pc, src);
4173 }
4174 }
4175 logAuditEvent(true, "listStatus", src);
4176 dl = dir.getListing(src, startAfter, needLocation);
4177 } finally {
4178 readUnlock();
4179 }
4180 return dl;
4181 }
4182
4183 /////////////////////////////////////////////////////////
4184 //
4185 // These methods are called by datanodes
4186 //
4187 /////////////////////////////////////////////////////////
4188 /**
4189 * Register Datanode.
4190 * <p>
4191 * The purpose of registration is to identify whether the new datanode
4192 * serves a new data storage, and will report new data block copies,
4193 * which the namenode was not aware of; or the datanode is a replacement
4194 * node for the data storage that was previously served by a different
4195 * or the same (in terms of host:port) datanode.
4196 * The data storages are distinguished by their storageIDs. When a new
4197 * data storage is reported the namenode issues a new unique storageID.
4198 * <p>
4199 * Finally, the namenode returns its namespaceID as the registrationID
4200 * for the datanodes.
4201 * namespaceID is a persistent attribute of the name space.
4202 * The registrationID is checked every time the datanode is communicating
4203 * with the namenode.
4204 * Datanodes with inappropriate registrationID are rejected.
4205 * If the namenode stops, and then restarts it can restore its
4206 * namespaceID and will continue serving the datanodes that has previously
4207 * registered with the namenode without restarting the whole cluster.
4208 *
4209 * @see org.apache.hadoop.hdfs.server.datanode.DataNode
4210 */
4211 void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
4212 writeLock();
4213 try {
4214 getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
4215 checkSafeMode();
4216 } finally {
4217 writeUnlock();
4218 }
4219 }
4220
4221 /**
4222 * Get registrationID for datanodes based on the namespaceID.
4223 *
4224 * @see #registerDatanode(DatanodeRegistration)
4225 * @return registration ID
4226 */
4227 String getRegistrationID() {
4228 return Storage.getRegistrationID(dir.fsImage.getStorage());
4229 }
4230
4231 /**
4232 * The given node has reported in. This method should:
4233 * 1) Record the heartbeat, so the datanode isn't timed out
4234 * 2) Adjust usage stats for future block allocation
4235 *
4236 * If a substantial amount of time passed since the last datanode
4237 * heartbeat then request an immediate block report.
4238 *
4239 * @return an array of datanode commands
4240 * @throws IOException
4241 */
4242 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
4243 StorageReport[] reports, long cacheCapacity, long cacheUsed,
4244 int xceiverCount, int xmitsInProgress, int failedVolumes)
4245 throws IOException {
4246 readLock();
4247 try {
4248 //get datanode commands
4249 final int maxTransfer = blockManager.getMaxReplicationStreams()
4250 - xmitsInProgress;
4251 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
4252 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
4253 xceiverCount, maxTransfer, failedVolumes);
4254
4255 //create ha status
4256 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
4257 haContext.getState().getServiceState(),
4258 getFSImage().getLastAppliedOrWrittenTxId());
4259
4260 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
4261 } finally {
4262 readUnlock();
4263 }
4264 }
4265
4266 /**
4267 * Returns whether or not there were available resources at the last check of
4268 * resources.
4269 *
4270 * @return true if there were sufficient resources available, false otherwise.
4271 */
4272 boolean nameNodeHasResourcesAvailable() {
4273 return hasResourcesAvailable;
4274 }
4275
4276 /**
4277 * Perform resource checks and cache the results.
4278 * @throws IOException
4279 */
4280 void checkAvailableResources() {
4281 Preconditions.checkState(nnResourceChecker != null,
4282 "nnResourceChecker not initialized");
4283 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
4284 }
4285
4286 /**
4287 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if
4288 * there are found to be insufficient resources available, causes the NN to
4289 * enter safe mode. If resources are later found to have returned to
4290 * acceptable levels, this daemon will cause the NN to exit safe mode.
4291 */
4292 class NameNodeResourceMonitor implements Runnable {
4293 boolean shouldNNRmRun = true;
4294 @Override
4295 public void run () {
4296 try {
4297 while (fsRunning && shouldNNRmRun) {
4298 checkAvailableResources();
4299 if(!nameNodeHasResourcesAvailable()) {
4300 String lowResourcesMsg = "NameNode low on available disk space. ";
4301 if (!isInSafeMode()) {
4302 FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode.");
4303 } else {
4304 FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode.");
4305 }
4306 enterSafeMode(true);
4307 }
4308 try {
4309 Thread.sleep(resourceRecheckInterval);
4310 } catch (InterruptedException ie) {
4311 // Deliberately ignore
4312 }
4313 }
4314 } catch (Exception e) {
4315 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e);
4316 }
4317 }
4318
4319 public void stopMonitor() {
4320 shouldNNRmRun = false;
4321 }
4322 }
4323
4324 class NameNodeEditLogRoller implements Runnable {
4325
4326 private boolean shouldRun = true;
4327 private final long rollThreshold;
4328 private final long sleepIntervalMs;
4329
4330 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
4331 this.rollThreshold = rollThreshold;
4332 this.sleepIntervalMs = sleepIntervalMs;
4333 }
4334
4335 @Override
4336 public void run() {
4337 while (fsRunning && shouldRun) {
4338 try {
4339 FSEditLog editLog = getFSImage().getEditLog();
4340 long numEdits =
4341 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
4342 if (numEdits > rollThreshold) {
4343 FSNamesystem.LOG.info("NameNode rolling its own edit log because"
4344 + " number of edits in open segment exceeds threshold of "
4345 + rollThreshold);
4346 rollEditLog();
4347 }
4348 Thread.sleep(sleepIntervalMs);
4349 } catch (InterruptedException e) {
4350 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
4351 + " was interrupted, exiting");
4352 break;
4353 } catch (Exception e) {
4354 FSNamesystem.LOG.error("Swallowing exception in "
4355 + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
4356 }
4357 }
4358 }
4359
4360 public void stop() {
4361 shouldRun = false;
4362 }
4363 }
4364
4365 public FSImage getFSImage() {
4366 return dir.fsImage;
4367 }
4368
4369 public FSEditLog getEditLog() {
4370 return getFSImage().getEditLog();
4371 }
4372
4373 private void checkBlock(ExtendedBlock block) throws IOException {
4374 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) {
4375 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId()
4376 + " - expected " + blockPoolId);
4377 }
4378 }
4379
4380 @Metric({"MissingBlocks", "Number of missing blocks"})
4381 public long getMissingBlocksCount() {
4382 // not locking
4383 return blockManager.getMissingBlocksCount();
4384 }
4385
4386 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"})
4387 public int getExpiredHeartbeats() {
4388 return datanodeStatistics.getExpiredHeartbeats();
4389 }
4390
4391 @Metric({"TransactionsSinceLastCheckpoint",
4392 "Number of transactions since last checkpoint"})
4393 public long getTransactionsSinceLastCheckpoint() {
4394 return getEditLog().getLastWrittenTxId() -
4395 getFSImage().getStorage().getMostRecentCheckpointTxId();
4396 }
4397
4398 @Metric({"TransactionsSinceLastLogRoll",
4399 "Number of transactions since last edit log roll"})
4400 public long getTransactionsSinceLastLogRoll() {
4401 if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
4402 return 0;
4403 } else {
4404 return getEditLog().getLastWrittenTxId() -
4405 getEditLog().getCurSegmentTxId() + 1;
4406 }
4407 }
4408
4409 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
4410 public long getLastWrittenTransactionId() {
4411 return getEditLog().getLastWrittenTxId();
4412 }
4413
4414 @Metric({"LastCheckpointTime",
4415 "Time in milliseconds since the epoch of the last checkpoint"})
4416 public long getLastCheckpointTime() {
4417 return getFSImage().getStorage().getMostRecentCheckpointTime();
4418 }
4419
4420 /** @see ClientProtocol#getStats() */
4421 long[] getStats() {
4422 final long[] stats = datanodeStatistics.getStats();
4423 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks();
4424 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks();
4425 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount();
4426 return stats;
4427 }
4428
4429 @Override // FSNamesystemMBean
4430 @Metric({"CapacityTotal",
4431 "Total raw capacity of data nodes in bytes"})
4432 public long getCapacityTotal() {
4433 return datanodeStatistics.getCapacityTotal();
4434 }
4435
4436 @Metric({"CapacityTotalGB",
4437 "Total raw capacity of data nodes in GB"})
4438 public float getCapacityTotalGB() {
4439 return DFSUtil.roundBytesToGB(getCapacityTotal());
4440 }
4441
4442 @Override // FSNamesystemMBean
4443 @Metric({"CapacityUsed",
4444 "Total used capacity across all data nodes in bytes"})
4445 public long getCapacityUsed() {
4446 return datanodeStatistics.getCapacityUsed();
4447 }
4448
4449 @Metric({"CapacityUsedGB",
4450 "Total used capacity across all data nodes in GB"})
4451 public float getCapacityUsedGB() {
4452 return DFSUtil.roundBytesToGB(getCapacityUsed());
4453 }
4454
4455 @Override // FSNamesystemMBean
4456 @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
4457 public long getCapacityRemaining() {
4458 return datanodeStatistics.getCapacityRemaining();
4459 }
4460
4461 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
4462 public float getCapacityRemainingGB() {
4463 return DFSUtil.roundBytesToGB(getCapacityRemaining());
4464 }
4465
4466 @Metric({"CapacityUsedNonDFS",
4467 "Total space used by data nodes for non DFS purposes in bytes"})
4468 public long getCapacityUsedNonDFS() {
4469 return datanodeStatistics.getCapacityUsedNonDFS();
4470 }
4471
4472 /**
4473 * Total number of connections.
4474 */
4475 @Override // FSNamesystemMBean
4476 @Metric
4477 public int getTotalLoad() {
4478 return datanodeStatistics.getXceiverCount();
4479 }
4480
4481 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" })
4482 public int getNumSnapshottableDirs() {
4483 return this.snapshotManager.getNumSnapshottableDirs();
4484 }
4485
4486 @Metric({ "Snapshots", "The number of snapshots" })
4487 public int getNumSnapshots() {
4488 return this.snapshotManager.getNumSnapshots();
4489 }
4490
4491 @Override
4492 public String getSnapshotStats() {
4493 Map<String, Object> info = new HashMap<String, Object>();
4494 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs());
4495 info.put("Snapshots", this.getNumSnapshots());
4496 return JSON.toString(info);
4497 }
4498
4499 int getNumberOfDatanodes(DatanodeReportType type) {
4500 readLock();
4501 try {
4502 return getBlockManager().getDatanodeManager().getDatanodeListForReport(
4503 type).size();
4504 } finally {
4505 readUnlock();
4506 }
4507 }
4508
4509 DatanodeInfo[] datanodeReport(final DatanodeReportType type
4510 ) throws AccessControlException, StandbyException {
4511 checkSuperuserPrivilege();
4512 checkOperation(OperationCategory.UNCHECKED);
4513 readLock();
4514 try {
4515 checkOperation(OperationCategory.UNCHECKED);
4516 final DatanodeManager dm = getBlockManager().getDatanodeManager();
4517 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
4518
4519 DatanodeInfo[] arr = new DatanodeInfo[results.size()];
4520 for (int i=0; i<arr.length; i++) {
4521 arr[i] = new DatanodeInfo(results.get(i));
4522 }
4523 return arr;
4524 } finally {
4525 readUnlock();
4526 }
4527 }
4528
4529 /**
4530 * Save namespace image.
4531 * This will save current namespace into fsimage file and empty edits file.
4532 * Requires superuser privilege and safe mode.
4533 *
4534 * @throws AccessControlException if superuser privilege is violated.
4535 * @throws IOException if
4536 */
4537 void saveNamespace() throws AccessControlException, IOException {
4538 checkOperation(OperationCategory.UNCHECKED);
4539 checkSuperuserPrivilege();
4540
4541 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
4542 if (cacheEntry != null && cacheEntry.isSuccess()) {
4543 return; // Return previous response
4544 }
4545 boolean success = false;
4546 readLock();
4547 try {
4548 checkOperation(OperationCategory.UNCHECKED);
4549
4550 if (!isInSafeMode()) {
4551 throw new IOException("Safe mode should be turned ON "
4552 + "in order to create namespace image.");
4553 }
4554 getFSImage().saveNamespace(this);
4555 success = true;
4556 } finally {
4557 readUnlock();
4558 RetryCache.setState(cacheEntry, success);
4559 }
4560 LOG.info("New namespace image has been created");
4561 }
4562
4563 /**
4564 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
4565 * Requires superuser privilege.
4566 *
4567 * @throws AccessControlException if superuser privilege is violated.
4568 */
4569 boolean restoreFailedStorage(String arg) throws AccessControlException,
4570 StandbyException {
4571 checkSuperuserPrivilege();
4572 checkOperation(OperationCategory.UNCHECKED);
4573 writeLock();
4574 try {
4575 checkOperation(OperationCategory.UNCHECKED);
4576
4577 // if it is disabled - enable it and vice versa.
4578 if(arg.equals("check"))
4579 return getFSImage().getStorage().getRestoreFailedStorage();
4580
4581 boolean val = arg.equals("true"); // false if not
4582 getFSImage().getStorage().setRestoreFailedStorage(val);
4583
4584 return val;
4585 } finally {
4586 writeUnlock();
4587 }
4588 }
4589
4590 Date getStartTime() {
4591 return new Date(startTime);
4592 }
4593
4594 void finalizeUpgrade() throws IOException {
4595 checkSuperuserPrivilege();
4596 checkOperation(OperationCategory.UNCHECKED);
4597 writeLock();
4598 try {
4599 checkOperation(OperationCategory.UNCHECKED);
4600 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
4601 } finally {
4602 writeUnlock();
4603 }
4604 }
4605
4606 void refreshNodes() throws IOException {
4607 checkOperation(OperationCategory.UNCHECKED);
4608 checkSuperuserPrivilege();
4609 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration());
4610 }
4611
4612 void setBalancerBandwidth(long bandwidth) throws IOException {
4613 checkOperation(OperationCategory.UNCHECKED);
4614 checkSuperuserPrivilege();
4615 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
4616 }
4617
4618 /**
4619 * SafeModeInfo contains information related to the safe mode.
4620 * <p>
4621 * An instance of {@link SafeModeInfo} is created when the name node
4622 * enters safe mode.
4623 * <p>
4624 * During name node startup {@link SafeModeInfo} counts the number of
4625 * <em>safe blocks</em>, those that have at least the minimal number of
4626 * replicas, and calculates the ratio of safe blocks to the total number
4627 * of blocks in the system, which is the size of blocks in
4628 * {@link FSNamesystem#blockManager}. When the ratio reaches the
4629 * {@link #threshold} it starts the SafeModeMonitor daemon in order
4630 * to monitor whether the safe mode {@link #extension} is passed.
4631 * Then it leaves safe mode and destroys itself.
4632 * <p>
4633 * If safe mode is turned on manually then the number of safe blocks is
4634 * not tracked because the name node is not intended to leave safe mode
4635 * automatically in the case.
4636 *
4637 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
4638 */
4639 public class SafeModeInfo {
4640 // configuration fields
4641 /** Safe mode threshold condition %.*/
4642 private final double threshold;
4643 /** Safe mode minimum number of datanodes alive */
4644 private final int datanodeThreshold;
4645 /** Safe mode extension after the threshold. */
4646 private int extension;
4647 /** Min replication required by safe mode. */
4648 private final int safeReplication;
4649 /** threshold for populating needed replication queues */
4650 private final double replQueueThreshold;
4651 // internal fields
4652 /** Time when threshold was reached.
4653 * <br> -1 safe mode is off
4654 * <br> 0 safe mode is on, and threshold is not reached yet
4655 * <br> >0 safe mode is on, but we are in extension period
4656 */
4657 private long reached = -1;
4658 /** Total number of blocks. */
4659 int blockTotal;
4660 /** Number of safe blocks. */
4661 int blockSafe;
4662 /** Number of blocks needed to satisfy safe mode threshold condition */
4663 private int blockThreshold;
4664 /** Number of blocks needed before populating replication queues */
4665 private int blockReplQueueThreshold;
4666 /** time of the last status printout */
4667 private long lastStatusReport = 0;
4668 /** Was safemode entered automatically because available resources were low. */
4669 private boolean resourcesLow = false;
4670 /** Should safemode adjust its block totals as blocks come in */
4671 private boolean shouldIncrementallyTrackBlocks = false;
4672 /** counter for tracking startup progress of reported blocks */
4673 private Counter awaitingReportedBlocksCounter;
4674
4675 /**
4676 * Creates SafeModeInfo when the name node enters
4677 * automatic safe mode at startup.
4678 *
4679 * @param conf configuration
4680 */
4681 private SafeModeInfo(Configuration conf) {
4682 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
4683 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT);
4684 if(threshold > 1.0) {
4685 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold);
4686 }
4687 this.datanodeThreshold = conf.getInt(
4688 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY,
4689 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT);
4690 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
4691 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY,
4692 DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
4693
4694 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold);
4695 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold);
4696 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension);
4697
4698 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode)
4699 this.replQueueThreshold =
4700 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY,
4701 (float) threshold);
4702 this.blockTotal = 0;
4703 this.blockSafe = 0;
4704 }
4705
4706 /**
4707 * In the HA case, the StandbyNode can be in safemode while the namespace
4708 * is modified by the edit log tailer. In this case, the number of total
4709 * blocks changes as edits are processed (eg blocks are added and deleted).
4710 * However, we don't want to do the incremental tracking during the
4711 * startup-time loading process -- only once the initial total has been
4712 * set after the image has been loaded.
4713 */
4714 private boolean shouldIncrementallyTrackBlocks() {
4715 return shouldIncrementallyTrackBlocks;
4716 }
4717
4718 /**
4719 * Creates SafeModeInfo when safe mode is entered manually, or because
4720 * available resources are low.
4721 *
4722 * The {@link #threshold} is set to 1.5 so that it could never be reached.
4723 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual.
4724 *
4725 * @see SafeModeInfo
4726 */
4727 private SafeModeInfo(boolean resourcesLow) {
4728 this.threshold = 1.5f; // this threshold can never be reached
4729 this.datanodeThreshold = Integer.MAX_VALUE;
4730 this.extension = Integer.MAX_VALUE;
4731 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication
4732 this.replQueueThreshold = 1.5f; // can never be reached
4733 this.blockTotal = -1;
4734 this.blockSafe = -1;
4735 this.resourcesLow = resourcesLow;
4736 enter();
4737 reportStatus("STATE* Safe mode is ON.", true);
4738 }
4739
4740 /**
4741 * Check if safe mode is on.
4742 * @return true if in safe mode
4743 */
4744 private synchronized boolean isOn() {
4745 doConsistencyCheck();
4746 return this.reached >= 0;
4747 }
4748
4749 /**
4750 * Enter safe mode.
4751 */
4752 private void enter() {
4753 this.reached = 0;
4754 }
4755
4756 /**
4757 * Leave safe mode.
4758 * <p>
4759 * Check for invalid, under- & over-replicated blocks in the end of startup.
4760 */
4761 private synchronized void leave() {
4762 // if not done yet, initialize replication queues.
4763 // In the standby, do not populate repl queues
4764 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
4765 initializeReplQueues();
4766 }
4767 long timeInSafemode = now() - startTime;
4768 NameNode.stateChangeLog.info("STATE* Leaving safe mode after "
4769 + timeInSafemode/1000 + " secs");
4770 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
4771
4772 //Log the following only once (when transitioning from ON -> OFF)
4773 if (reached >= 0) {
4774 NameNode.stateChangeLog.info("STATE* Safe mode is OFF");
4775 }
4776 reached = -1;
4777 safeMode = null;
4778 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology();
4779 NameNode.stateChangeLog.info("STATE* Network topology has "
4780 + nt.getNumOfRacks() + " racks and "
4781 + nt.getNumOfLeaves() + " datanodes");
4782 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
4783 + blockManager.numOfUnderReplicatedBlocks() + " blocks");
4784
4785 startSecretManagerIfNecessary();
4786
4787 // If startup has not yet completed, end safemode phase.
4788 StartupProgress prog = NameNode.getStartupProgress();
4789 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4790 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS);
4791 prog.endPhase(Phase.SAFEMODE);
4792 }
4793 }
4794
4795 /**
4796 * Check whether we have reached the threshold for
4797 * initializing replication queues.
4798 */
4799 private synchronized boolean canInitializeReplQueues() {
4800 return shouldPopulateReplQueues()
4801 && blockSafe >= blockReplQueueThreshold;
4802 }
4803
4804 /**
4805 * Safe mode can be turned off iff
4806 * the threshold is reached and
4807 * the extension time have passed.
4808 * @return true if can leave or false otherwise.
4809 */
4810 private synchronized boolean canLeave() {
4811 if (reached == 0) {
4812 return false;
4813 }
4814
4815 if (now() - reached < extension) {
4816 reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
4817 return false;
4818 }
4819
4820 if (needEnter()) {
4821 reportStatus("STATE* Safe mode ON, thresholds not met.", false);
4822 return false;
4823 }
4824
4825 return true;
4826 }
4827
4828 /**
4829 * There is no need to enter safe mode
4830 * if DFS is empty or {@link #threshold} == 0
4831 */
4832 private boolean needEnter() {
4833 return (threshold != 0 && blockSafe < blockThreshold) ||
4834 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) ||
4835 (!nameNodeHasResourcesAvailable());
4836 }
4837
4838 /**
4839 * Check and trigger safe mode if needed.
4840 */
4841 private void checkMode() {
4842 // Have to have write-lock since leaving safemode initializes
4843 // repl queues, which requires write lock
4844 assert hasWriteLock();
4845 // if smmthread is already running, the block threshold must have been
4846 // reached before, there is no need to enter the safe mode again
4847 if (smmthread == null && needEnter()) {
4848 enter();
4849 // check if we are ready to initialize replication queues
4850 if (canInitializeReplQueues() && !isPopulatingReplQueues()
4851 && !haEnabled) {
4852 initializeReplQueues();
4853 }
4854 reportStatus("STATE* Safe mode ON.", false);
4855 return;
4856 }
4857 // the threshold is reached or was reached before
4858 if (!isOn() || // safe mode is off
4859 extension <= 0 || threshold <= 0) { // don't need to wait
4860 this.leave(); // leave safe mode
4861 return;
4862 }
4863 if (reached > 0) { // threshold has already been reached before
4864 reportStatus("STATE* Safe mode ON.", false);
4865 return;
4866 }
4867 // start monitor
4868 reached = now();
4869 if (smmthread == null) {
4870 smmthread = new Daemon(new SafeModeMonitor());
4871 smmthread.start();
4872 reportStatus("STATE* Safe mode extension entered.", true);
4873 }
4874
4875 // check if we are ready to initialize replication queues
4876 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) {
4877 initializeReplQueues();
4878 }
4879 }
4880
4881 /**
4882 * Set total number of blocks.
4883 */
4884 private synchronized void setBlockTotal(int total) {
4885 this.blockTotal = total;
4886 this.blockThreshold = (int) (blockTotal * threshold);
4887 this.blockReplQueueThreshold =
4888 (int) (blockTotal * replQueueThreshold);
4889 if (haEnabled) {
4890 // After we initialize the block count, any further namespace
4891 // modifications done while in safe mode need to keep track
4892 // of the number of total blocks in the system.
4893 this.shouldIncrementallyTrackBlocks = true;
4894 }
4895 if(blockSafe < 0)
4896 this.blockSafe = 0;
4897 checkMode();
4898 }
4899
4900 /**
4901 * Increment number of safe blocks if current block has
4902 * reached minimal replication.
4903 * @param replication current replication
4904 */
4905 private synchronized void incrementSafeBlockCount(short replication) {
4906 if (replication == safeReplication) {
4907 this.blockSafe++;
4908
4909 // Report startup progress only if we haven't completed startup yet.
4910 StartupProgress prog = NameNode.getStartupProgress();
4911 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) {
4912 if (this.awaitingReportedBlocksCounter == null) {
4913 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE,
4914 STEP_AWAITING_REPORTED_BLOCKS);
4915 }
4916 this.awaitingReportedBlocksCounter.increment();
4917 }
4918
4919 checkMode();
4920 }
4921 }
4922
4923 /**
4924 * Decrement number of safe blocks if current block has
4925 * fallen below minimal replication.
4926 * @param replication current replication
4927 */
4928 private synchronized void decrementSafeBlockCount(short replication) {
4929 if (replication == safeReplication-1) {
4930 this.blockSafe--;
4931 //blockSafe is set to -1 in manual / low resources safemode
4932 assert blockSafe >= 0 || isManual() || areResourcesLow();
4933 checkMode();
4934 }
4935 }
4936
4937 /**
4938 * Check if safe mode was entered manually
4939 */
4940 private boolean isManual() {
4941 return extension == Integer.MAX_VALUE;
4942 }
4943
4944 /**
4945 * Set manual safe mode.
4946 */
4947 private synchronized void setManual() {
4948 extension = Integer.MAX_VALUE;
4949 }
4950
4951 /**
4952 * Check if safe mode was entered due to resources being low.
4953 */
4954 private boolean areResourcesLow() {
4955 return resourcesLow;
4956 }
4957
4958 /**
4959 * Set that resources are low for this instance of safe mode.
4960 */
4961 private void setResourcesLow() {
4962 resourcesLow = true;
4963 }
4964
4965 /**
4966 * A tip on how safe mode is to be turned off: manually or automatically.
4967 */
4968 String getTurnOffTip() {
4969 if(!isOn()) {
4970 return "Safe mode is OFF.";
4971 }
4972
4973 //Manual OR low-resource safemode. (Admin intervention required)
4974 String adminMsg = "It was turned on manually. ";
4975 if (areResourcesLow()) {
4976 adminMsg = "Resources are low on NN. Please add or free up more "
4977 + "resources then turn off safe mode manually. NOTE: If you turn off"
4978 + " safe mode before adding resources, "
4979 + "the NN will immediately return to safe mode. ";
4980 }
4981 if (isManual() || areResourcesLow()) {
4982 return adminMsg
4983 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
4984 }
4985
4986 boolean thresholdsMet = true;
4987 int numLive = getNumLiveDataNodes();
4988 String msg = "";
4989 if (blockSafe < blockThreshold) {
4990 msg += String.format(
4991 "The reported blocks %d needs additional %d"
4992 + " blocks to reach the threshold %.4f of total blocks %d.\n",
4993 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
4994 thresholdsMet = false;
4995 } else {
4996 msg += String.format("The reported blocks %d has reached the threshold"
4997 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
4998 }
4999 if (numLive < datanodeThreshold) {
5000 msg += String.format(
5001 "The number of live datanodes %d needs an additional %d live "
5002 + "datanodes to reach the minimum number %d.\n",
5003 numLive, (datanodeThreshold - numLive), datanodeThreshold);
5004 thresholdsMet = false;
5005 } else {
5006 msg += String.format("The number of live datanodes %d has reached "
5007 + "the minimum number %d. ",
5008 numLive, datanodeThreshold);
5009 }
5010 msg += (reached > 0) ? "In safe mode extension. " : "";
5011 msg += "Safe mode will be turned off automatically ";
5012
5013 if (!thresholdsMet) {
5014 msg += "once the thresholds have been reached.";
5015 } else if (reached + extension - now() > 0) {
5016 msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
5017 } else {
5018 msg += "soon.";
5019 }
5020
5021 return msg;
5022 }
5023
5024 /**
5025 * Print status every 20 seconds.
5026 */
5027 private void reportStatus(String msg, boolean rightNow) {
5028 long curTime = now();
5029 if(!rightNow && (curTime - lastStatusReport < 20 * 1000))
5030 return;
5031 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip());
5032 lastStatusReport = curTime;
5033 }
5034
5035 @Override
5036 public String toString() {
5037 String resText = "Current safe blocks = "
5038 + blockSafe
5039 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold
5040 + ". Minimal replication = " + safeReplication + ".";
5041 if (reached > 0)
5042 resText += " Threshold was reached " + new Date(reached) + ".";
5043 return resText;
5044 }
5045
5046 /**
5047 * Checks consistency of the class state.
5048 * This is costly so only runs if asserts are enabled.
5049 */
5050 private void doConsistencyCheck() {
5051 boolean assertsOn = false;
5052 assert assertsOn = true; // set to true if asserts are on
5053 if (!assertsOn) return;
5054
5055 if (blockTotal == -1 && blockSafe == -1) {
5056 return; // manual safe mode
5057 }
5058 int activeBlocks = blockManager.getActiveBlockCount();
5059 if ((blockTotal != activeBlocks) &&
5060 !(blockSafe >= 0 && blockSafe <= blockTotal)) {
5061 throw new AssertionError(
5062 " SafeMode: Inconsistent filesystem state: "
5063 + "SafeMode data: blockTotal=" + blockTotal
5064 + " blockSafe=" + blockSafe + "; "
5065 + "BlockManager data: active=" + activeBlocks);
5066 }
5067 }
5068
5069 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
5070 if (!shouldIncrementallyTrackBlocks) {
5071 return;
5072 }
5073 assert haEnabled;
5074
5075 if (LOG.isDebugEnabled()) {
5076 LOG.debug("Adjusting block totals from " +
5077 blockSafe + "/" + blockTotal + " to " +
5078 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
5079 }
5080 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
5081 blockSafe + " by " + deltaSafe + ": would be negative";
5082 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
5083 blockTotal + " by " + deltaTotal + ": would be negative";
5084
5085 blockSafe += deltaSafe;
5086 setBlockTotal(blockTotal + deltaTotal);
5087 }
5088 }
5089
5090 /**
5091 * Periodically check whether it is time to leave safe mode.
5092 * This thread starts when the threshold level is reached.
5093 *
5094 */
5095 class SafeModeMonitor implements Runnable {
5096 /** interval in msec for checking safe mode: {@value} */
5097 private static final long recheckInterval = 1000;
5098
5099 /**
5100 */
5101 @Override
5102 public void run() {
5103 while (fsRunning) {
5104 writeLock();
5105 try {
5106 if (safeMode == null) { // Not in safe mode.
5107 break;
5108 }
5109 if (safeMode.canLeave()) {
5110 // Leave safe mode.
5111 safeMode.leave();
5112 smmthread = null;
5113 break;
5114 }
5115 } finally {
5116 writeUnlock();
5117 }
5118
5119 try {
5120 Thread.sleep(recheckInterval);
5121 } catch (InterruptedException ie) {
5122 // Ignored
5123 }
5124 }
5125 if (!fsRunning) {
5126 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread");
5127 }
5128 }
5129 }
5130
5131 boolean setSafeMode(SafeModeAction action) throws IOException {
5132 if (action != SafeModeAction.SAFEMODE_GET) {
5133 checkSuperuserPrivilege();
5134 switch(action) {
5135 case SAFEMODE_LEAVE: // leave safe mode
5136 leaveSafeMode();
5137 break;
5138 case SAFEMODE_ENTER: // enter safe mode
5139 enterSafeMode(false);
5140 break;
5141 default:
5142 LOG.error("Unexpected safe mode action");
5143 }
5144 }
5145 return isInSafeMode();
5146 }
5147
5148 @Override
5149 public void checkSafeMode() {
5150 // safeMode is volatile, and may be set to null at any time
5151 SafeModeInfo safeMode = this.safeMode;
5152 if (safeMode != null) {
5153 safeMode.checkMode();
5154 }
5155 }
5156
5157 @Override
5158 public boolean isInSafeMode() {
5159 // safeMode is volatile, and may be set to null at any time
5160 SafeModeInfo safeMode = this.safeMode;
5161 if (safeMode == null)
5162 return false;
5163 return safeMode.isOn();
5164 }
5165
5166 @Override
5167 public boolean isInStartupSafeMode() {
5168 // safeMode is volatile, and may be set to null at any time
5169 SafeModeInfo safeMode = this.safeMode;
5170 if (safeMode == null)
5171 return false;
5172 // If the NN is in safemode, and not due to manual / low resources, we
5173 // assume it must be because of startup. If the NN had low resources during
5174 // startup, we assume it came out of startup safemode and it is now in low
5175 // resources safemode
5176 return !safeMode.isManual() && !safeMode.areResourcesLow()
5177 && safeMode.isOn();
5178 }
5179
5180 /**
5181 * Check if replication queues are to be populated
5182 * @return true when node is HAState.Active and not in the very first safemode
5183 */
5184 @Override
5185 public boolean isPopulatingReplQueues() {
5186 if (!shouldPopulateReplQueues()) {
5187 return false;
5188 }
5189 return initializedReplQueues;
5190 }
5191
5192 private boolean shouldPopulateReplQueues() {
5193 if(haContext == null || haContext.getState() == null)
5194 return false;
5195 return haContext.getState().shouldPopulateReplQueues();
5196 }
5197
5198 @Override
5199 public void incrementSafeBlockCount(int replication) {
5200 // safeMode is volatile, and may be set to null at any time
5201 SafeModeInfo safeMode = this.safeMode;
5202 if (safeMode == null)
5203 return;
5204 safeMode.incrementSafeBlockCount((short)replication);
5205 }
5206
5207 @Override
5208 public void decrementSafeBlockCount(Block b) {
5209 // safeMode is volatile, and may be set to null at any time
5210 SafeModeInfo safeMode = this.safeMode;
5211 if (safeMode == null) // mostly true
5212 return;
5213 BlockInfo storedBlock = getStoredBlock(b);
5214 if (storedBlock.isComplete()) {
5215 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
5216 }
5217 }
5218
5219 /**
5220 * Adjust the total number of blocks safe and expected during safe mode.
5221 * If safe mode is not currently on, this is a no-op.
5222 * @param deltaSafe the change in number of safe blocks
5223 * @param deltaTotal the change i nnumber of total blocks expected
5224 */
5225 @Override
5226 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
5227 // safeMode is volatile, and may be set to null at any time
5228 SafeModeInfo safeMode = this.safeMode;
5229 if (safeMode == null)
5230 return;
5231 safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
5232 }
5233
5234 /**
5235 * Set the total number of blocks in the system.
5236 */
5237 public void setBlockTotal() {
5238 // safeMode is volatile, and may be set to null at any time
5239 SafeModeInfo safeMode = this.safeMode;
5240 if (safeMode == null)
5241 return;
5242 safeMode.setBlockTotal((int)getCompleteBlocksTotal());
5243 }
5244
5245 /**
5246 * Get the total number of blocks in the system.
5247 */
5248 @Override // FSNamesystemMBean
5249 @Metric
5250 public long getBlocksTotal() {
5251 return blockManager.getTotalBlocks();
5252 }
5253
5254 /**
5255 * Get the total number of COMPLETE blocks in the system.
5256 * For safe mode only complete blocks are counted.
5257 */
5258 private long getCompleteBlocksTotal() {
5259 // Calculate number of blocks under construction
5260 long numUCBlocks = 0;
5261 readLock();
5262 try {
5263 for (Lease lease : leaseManager.getSortedLeases()) {
5264 for (String path : lease.getPaths()) {
5265 final INodeFile cons;
5266 try {
5267 cons = dir.getINode(path).asFile();
5268 Preconditions.checkState(cons.isUnderConstruction());
5269 } catch (UnresolvedLinkException e) {
5270 throw new AssertionError("Lease files should reside on this FS");
5271 }
5272 BlockInfo[] blocks = cons.getBlocks();
5273 if(blocks == null)
5274 continue;
5275 for(BlockInfo b : blocks) {
5276 if(!b.isComplete())
5277 numUCBlocks++;
5278 }
5279 }
5280 }
5281 LOG.info("Number of blocks under construction: " + numUCBlocks);
5282 return getBlocksTotal() - numUCBlocks;
5283 } finally {
5284 readUnlock();
5285 }
5286 }
5287
5288 /**
5289 * Enter safe mode. If resourcesLow is false, then we assume it is manual
5290 * @throws IOException
5291 */
5292 void enterSafeMode(boolean resourcesLow) throws IOException {
5293 writeLock();
5294 try {
5295 // Stop the secret manager, since rolling the master key would
5296 // try to write to the edit log
5297 stopSecretManager();
5298
5299 // Ensure that any concurrent operations have been fully synced
5300 // before entering safe mode. This ensures that the FSImage
5301 // is entirely stable on disk as soon as we're in safe mode.
5302 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
5303 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
5304 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
5305 if (isEditlogOpenForWrite) {
5306 getEditLog().logSyncAll();
5307 }
5308 if (!isInSafeMode()) {
5309 safeMode = new SafeModeInfo(resourcesLow);
5310 return;
5311 }
5312 if (resourcesLow) {
5313 safeMode.setResourcesLow();
5314 } else {
5315 safeMode.setManual();
5316 }
5317 if (isEditlogOpenForWrite) {
5318 getEditLog().logSyncAll();
5319 }
5320 NameNode.stateChangeLog.info("STATE* Safe mode is ON"
5321 + safeMode.getTurnOffTip());
5322 } finally {
5323 writeUnlock();
5324 }
5325 }
5326
5327 /**
5328 * Leave safe mode.
5329 * @throws IOException
5330 */
5331 void leaveSafeMode() {
5332 writeLock();
5333 try {
5334 if (!isInSafeMode()) {
5335 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF");
5336 return;
5337 }
5338 safeMode.leave();
5339 } finally {
5340 writeUnlock();
5341 }
5342 }
5343
5344 String getSafeModeTip() {
5345 readLock();
5346 try {
5347 if (!isInSafeMode()) {
5348 return "";
5349 }
5350 return safeMode.getTurnOffTip();
5351 } finally {
5352 readUnlock();
5353 }
5354 }
5355
5356 CheckpointSignature rollEditLog() throws IOException {
5357 checkSuperuserPrivilege();
5358 checkOperation(OperationCategory.JOURNAL);
5359 writeLock();
5360 try {
5361 checkOperation(OperationCategory.JOURNAL);
5362 checkNameNodeSafeMode("Log not rolled");
5363 if (Server.isRpcInvocation()) {
5364 LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
5365 }
5366 return getFSImage().rollEditLog();
5367 } finally {
5368 writeUnlock();
5369 }
5370 }
5371
5372 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode,
5373 NamenodeRegistration activeNamenode) throws IOException {
5374 checkOperation(OperationCategory.CHECKPOINT);
5375 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
5376 null);
5377 if (cacheEntry != null && cacheEntry.isSuccess()) {
5378 return (NamenodeCommand) cacheEntry.getPayload();
5379 }
5380 writeLock();
5381 NamenodeCommand cmd = null;
5382 try {
5383 checkOperation(OperationCategory.CHECKPOINT);
5384 checkNameNodeSafeMode("Checkpoint not started");
5385
5386 LOG.info("Start checkpoint for " + backupNode.getAddress());
5387 cmd = getFSImage().startCheckpoint(backupNode, activeNamenode);
5388 getEditLog().logSync();
5389 return cmd;
5390 } finally {
5391 writeUnlock();
5392 RetryCache.setState(cacheEntry, cmd != null, cmd);
5393 }
5394 }
5395
5396 public void processIncrementalBlockReport(final DatanodeID nodeID,
5397 final String poolId, final StorageReceivedDeletedBlocks srdb)
5398 throws IOException {
5399 writeLock();
5400 try {
5401 blockManager.processIncrementalBlockReport(nodeID, srdb);
5402 } finally {
5403 writeUnlock();
5404 }
5405 }
5406
5407 void endCheckpoint(NamenodeRegistration registration,
5408 CheckpointSignature sig) throws IOException {
5409 checkOperation(OperationCategory.CHECKPOINT);
5410 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5411 if (cacheEntry != null && cacheEntry.isSuccess()) {
5412 return; // Return previous response
5413 }
5414 boolean success = false;
5415 readLock();
5416 try {
5417 checkOperation(OperationCategory.CHECKPOINT);
5418
5419 checkNameNodeSafeMode("Checkpoint not ended");
5420 LOG.info("End checkpoint for " + registration.getAddress());
5421 getFSImage().endCheckpoint(sig);
5422 success = true;
5423 } finally {
5424 readUnlock();
5425 RetryCache.setState(cacheEntry, success);
5426 }
5427 }
5428
5429 PermissionStatus createFsOwnerPermissions(FsPermission permission) {
5430 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
5431 }
5432
5433 private void checkOwner(FSPermissionChecker pc, String path)
5434 throws AccessControlException, UnresolvedLinkException {
5435 checkPermission(pc, path, true, null, null, null, null);
5436 }
5437
5438 private void checkPathAccess(FSPermissionChecker pc,
5439 String path, FsAction access) throws AccessControlException,
5440 UnresolvedLinkException {
5441 checkPermission(pc, path, false, null, null, access, null);
5442 }
5443
5444 private void checkParentAccess(FSPermissionChecker pc,
5445 String path, FsAction access) throws AccessControlException,
5446 UnresolvedLinkException {
5447 checkPermission(pc, path, false, null, access, null, null);
5448 }
5449
5450 private void checkAncestorAccess(FSPermissionChecker pc,
5451 String path, FsAction access) throws AccessControlException,
5452 UnresolvedLinkException {
5453 checkPermission(pc, path, false, access, null, null, null);
5454 }
5455
5456 private void checkTraverse(FSPermissionChecker pc, String path)
5457 throws AccessControlException, UnresolvedLinkException {
5458 checkPermission(pc, path, false, null, null, null, null);
5459 }
5460
5461 @Override
5462 public void checkSuperuserPrivilege()
5463 throws AccessControlException {
5464 if (isPermissionEnabled) {
5465 FSPermissionChecker pc = getPermissionChecker();
5466 pc.checkSuperuserPrivilege();
5467 }
5468 }
5469
5470 /**
5471 * Check whether current user have permissions to access the path. For more
5472 * details of the parameters, see
5473 * {@link FSPermissionChecker#checkPermission()}.
5474 */
5475 private void checkPermission(FSPermissionChecker pc,
5476 String path, boolean doCheckOwner, FsAction ancestorAccess,
5477 FsAction parentAccess, FsAction access, FsAction subAccess)
5478 throws AccessControlException, UnresolvedLinkException {
5479 checkPermission(pc, path, doCheckOwner, ancestorAccess,
5480 parentAccess, access, subAccess, true);
5481 }
5482
5483 /**
5484 * Check whether current user have permissions to access the path. For more
5485 * details of the parameters, see
5486 * {@link FSPermissionChecker#checkPermission()}.
5487 */
5488 private void checkPermission(FSPermissionChecker pc,
5489 String path, boolean doCheckOwner, FsAction ancestorAccess,
5490 FsAction parentAccess, FsAction access, FsAction subAccess,
5491 boolean resolveLink)
5492 throws AccessControlException, UnresolvedLinkException {
5493 if (!pc.isSuperUser()) {
5494 dir.waitForReady();
5495 readLock();
5496 try {
5497 pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess,
5498 parentAccess, access, subAccess, resolveLink);
5499 } finally {
5500 readUnlock();
5501 }
5502 }
5503 }
5504
5505 /**
5506 * Check to see if we have exceeded the limit on the number
5507 * of inodes.
5508 */
5509 void checkFsObjectLimit() throws IOException {
5510 if (maxFsObjects != 0 &&
5511 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) {
5512 throw new IOException("Exceeded the configured number of objects " +
5513 maxFsObjects + " in the filesystem.");
5514 }
5515 }
5516
5517 /**
5518 * Get the total number of objects in the system.
5519 */
5520 @Override // FSNamesystemMBean
5521 public long getMaxObjects() {
5522 return maxFsObjects;
5523 }
5524
5525 @Override // FSNamesystemMBean
5526 @Metric
5527 public long getFilesTotal() {
5528 readLock();
5529 try {
5530 return this.dir.totalInodes();
5531 } finally {
5532 readUnlock();
5533 }
5534 }
5535
5536 @Override // FSNamesystemMBean
5537 @Metric
5538 public long getPendingReplicationBlocks() {
5539 return blockManager.getPendingReplicationBlocksCount();
5540 }
5541
5542 @Override // FSNamesystemMBean
5543 @Metric
5544 public long getUnderReplicatedBlocks() {
5545 return blockManager.getUnderReplicatedBlocksCount();
5546 }
5547
5548 /** Returns number of blocks with corrupt replicas */
5549 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"})
5550 public long getCorruptReplicaBlocks() {
5551 return blockManager.getCorruptReplicaBlocksCount();
5552 }
5553
5554 @Override // FSNamesystemMBean
5555 @Metric
5556 public long getScheduledReplicationBlocks() {
5557 return blockManager.getScheduledReplicationBlocksCount();
5558 }
5559
5560 @Override
5561 @Metric
5562 public long getPendingDeletionBlocks() {
5563 return blockManager.getPendingDeletionBlocksCount();
5564 }
5565
5566 @Metric
5567 public long getExcessBlocks() {
5568 return blockManager.getExcessBlocksCount();
5569 }
5570
5571 // HA-only metric
5572 @Metric
5573 public long getPostponedMisreplicatedBlocks() {
5574 return blockManager.getPostponedMisreplicatedBlocksCount();
5575 }
5576
5577 // HA-only metric
5578 @Metric
5579 public int getPendingDataNodeMessageCount() {
5580 return blockManager.getPendingDataNodeMessageCount();
5581 }
5582
5583 // HA-only metric
5584 @Metric
5585 public String getHAState() {
5586 return haContext.getState().toString();
5587 }
5588
5589 // HA-only metric
5590 @Metric
5591 public long getMillisSinceLastLoadedEdits() {
5592 if (isInStandbyState() && editLogTailer != null) {
5593 return now() - editLogTailer.getLastLoadTimestamp();
5594 } else {
5595 return 0;
5596 }
5597 }
5598
5599 @Metric
5600 public int getBlockCapacity() {
5601 return blockManager.getCapacity();
5602 }
5603
5604 @Override // FSNamesystemMBean
5605 public String getFSState() {
5606 return isInSafeMode() ? "safeMode" : "Operational";
5607 }
5608
5609 private ObjectName mbeanName;
5610 private ObjectName mxbeanName;
5611
5612 /**
5613 * Register the FSNamesystem MBean using the name
5614 * "hadoop:service=NameNode,name=FSNamesystemState"
5615 */
5616 private void registerMBean() {
5617 // We can only implement one MXBean interface, so we keep the old one.
5618 try {
5619 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class);
5620 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean);
5621 } catch (NotCompliantMBeanException e) {
5622 throw new RuntimeException("Bad MBean setup", e);
5623 }
5624
5625 LOG.info("Registered FSNamesystemState MBean");
5626 }
5627
5628 /**
5629 * shutdown FSNamesystem
5630 */
5631 void shutdown() {
5632 if (mbeanName != null) {
5633 MBeans.unregister(mbeanName);
5634 mbeanName = null;
5635 }
5636 if (mxbeanName != null) {
5637 MBeans.unregister(mxbeanName);
5638 mxbeanName = null;
5639 }
5640 if (dir != null) {
5641 dir.shutdown();
5642 }
5643 if (blockManager != null) {
5644 blockManager.shutdown();
5645 }
5646 }
5647
5648
5649 @Override // FSNamesystemMBean
5650 public int getNumLiveDataNodes() {
5651 return getBlockManager().getDatanodeManager().getNumLiveDataNodes();
5652 }
5653
5654 @Override // FSNamesystemMBean
5655 public int getNumDeadDataNodes() {
5656 return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
5657 }
5658
5659 @Override // FSNamesystemMBean
5660 public int getNumDecomLiveDataNodes() {
5661 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
5662 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
5663 int liveDecommissioned = 0;
5664 for (DatanodeDescriptor node : live) {
5665 liveDecommissioned += node.isDecommissioned() ? 1 : 0;
5666 }
5667 return liveDecommissioned;
5668 }
5669
5670 @Override // FSNamesystemMBean
5671 public int getNumDecomDeadDataNodes() {
5672 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
5673 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
5674 int deadDecommissioned = 0;
5675 for (DatanodeDescriptor node : dead) {
5676 deadDecommissioned += node.isDecommissioned() ? 1 : 0;
5677 }
5678 return deadDecommissioned;
5679 }
5680
5681 @Override // FSNamesystemMBean
5682 public int getNumDecommissioningDataNodes() {
5683 return getBlockManager().getDatanodeManager().getDecommissioningNodes()
5684 .size();
5685 }
5686
5687 @Override // FSNamesystemMBean
5688 @Metric({"StaleDataNodes",
5689 "Number of datanodes marked stale due to delayed heartbeat"})
5690 public int getNumStaleDataNodes() {
5691 return getBlockManager().getDatanodeManager().getNumStaleNodes();
5692 }
5693
5694 /**
5695 * Sets the current generation stamp for legacy blocks
5696 */
5697 void setGenerationStampV1(long stamp) {
5698 generationStampV1.setCurrentValue(stamp);
5699 }
5700
5701 /**
5702 * Gets the current generation stamp for legacy blocks
5703 */
5704 long getGenerationStampV1() {
5705 return generationStampV1.getCurrentValue();
5706 }
5707
5708 /**
5709 * Gets the current generation stamp for this filesystem
5710 */
5711 void setGenerationStampV2(long stamp) {
5712 generationStampV2.setCurrentValue(stamp);
5713 }
5714
5715 /**
5716 * Gets the current generation stamp for this filesystem
5717 */
5718 long getGenerationStampV2() {
5719 return generationStampV2.getCurrentValue();
5720 }
5721
5722 /**
5723 * Upgrades the generation stamp for the filesystem
5724 * by reserving a sufficient range for all existing blocks.
5725 * Should be invoked only during the first upgrade to
5726 * sequential block IDs.
5727 */
5728 long upgradeGenerationStampToV2() {
5729 Preconditions.checkState(generationStampV2.getCurrentValue() ==
5730 GenerationStamp.LAST_RESERVED_STAMP);
5731
5732 generationStampV2.skipTo(
5733 generationStampV1.getCurrentValue() +
5734 HdfsConstants.RESERVED_GENERATION_STAMPS_V1);
5735
5736 generationStampV1Limit = generationStampV2.getCurrentValue();
5737 return generationStampV2.getCurrentValue();
5738 }
5739
5740 /**
5741 * Sets the generation stamp that delineates random and sequentially
5742 * allocated block IDs.
5743 * @param stamp
5744 */
5745 void setGenerationStampV1Limit(long stamp) {
5746 Preconditions.checkState(generationStampV1Limit ==
5747 GenerationStamp.GRANDFATHER_GENERATION_STAMP);
5748 generationStampV1Limit = stamp;
5749 }
5750
5751 /**
5752 * Gets the value of the generation stamp that delineates sequential
5753 * and random block IDs.
5754 */
5755 long getGenerationStampAtblockIdSwitch() {
5756 return generationStampV1Limit;
5757 }
5758
5759 @VisibleForTesting
5760 SequentialBlockIdGenerator getBlockIdGenerator() {
5761 return blockIdGenerator;
5762 }
5763
5764 /**
5765 * Sets the maximum allocated block ID for this filesystem. This is
5766 * the basis for allocating new block IDs.
5767 */
5768 void setLastAllocatedBlockId(long blockId) {
5769 blockIdGenerator.skipTo(blockId);
5770 }
5771
5772 /**
5773 * Gets the maximum sequentially allocated block ID for this filesystem
5774 */
5775 long getLastAllocatedBlockId() {
5776 return blockIdGenerator.getCurrentValue();
5777 }
5778
5779 /**
5780 * Increments, logs and then returns the stamp
5781 */
5782 long nextGenerationStamp(boolean legacyBlock)
5783 throws IOException, SafeModeException {
5784 assert hasWriteLock();
5785 checkNameNodeSafeMode("Cannot get next generation stamp");
5786
5787 long gs;
5788 if (legacyBlock) {
5789 gs = getNextGenerationStampV1();
5790 getEditLog().logGenerationStampV1(gs);
5791 } else {
5792 gs = getNextGenerationStampV2();
5793 getEditLog().logGenerationStampV2(gs);
5794 }
5795
5796 // NB: callers sync the log
5797 return gs;
5798 }
5799
5800 @VisibleForTesting
5801 long getNextGenerationStampV1() throws IOException {
5802 long genStampV1 = generationStampV1.nextValue();
5803
5804 if (genStampV1 >= generationStampV1Limit) {
5805 // We ran out of generation stamps for legacy blocks. In practice, it
5806 // is extremely unlikely as we reserved 1T v1 generation stamps. The
5807 // result is that we can no longer append to the legacy blocks that
5808 // were created before the upgrade to sequential block IDs.
5809 throw new OutOfV1GenerationStampsException();
5810 }
5811
5812 return genStampV1;
5813 }
5814
5815 @VisibleForTesting
5816 long getNextGenerationStampV2() {
5817 return generationStampV2.nextValue();
5818 }
5819
5820 long getGenerationStampV1Limit() {
5821 return generationStampV1Limit;
5822 }
5823
5824 /**
5825 * Determine whether the block ID was randomly generated (legacy) or
5826 * sequentially generated. The generation stamp value is used to
5827 * make the distinction.
5828 * @param block
5829 * @return true if the block ID was randomly generated, false otherwise.
5830 */
5831 boolean isLegacyBlock(Block block) {
5832 return block.getGenerationStamp() < getGenerationStampV1Limit();
5833 }
5834
5835 /**
5836 * Increments, logs and then returns the block ID
5837 */
5838 private long nextBlockId() throws IOException {
5839 assert hasWriteLock();
5840 checkNameNodeSafeMode("Cannot get next block ID");
5841 final long blockId = blockIdGenerator.nextValue();
5842 getEditLog().logAllocateBlockId(blockId);
5843 // NB: callers sync the log
5844 return blockId;
5845 }
5846
5847 private INodeFile checkUCBlock(ExtendedBlock block,
5848 String clientName) throws IOException {
5849 assert hasWriteLock();
5850 checkNameNodeSafeMode("Cannot get a new generation stamp and an "
5851 + "access token for block " + block);
5852
5853 // check stored block state
5854 BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block));
5855 if (storedBlock == null ||
5856 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) {
5857 throw new IOException(block +
5858 " does not exist or is not under Construction" + storedBlock);
5859 }
5860
5861 // check file inode
5862 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile();
5863 if (file == null || !file.isUnderConstruction()) {
5864 throw new IOException("The file " + storedBlock +
5865 " belonged to does not exist or it is not under construction.");
5866 }
5867
5868 // check lease
5869 if (clientName == null
5870 || !clientName.equals(file.getFileUnderConstructionFeature()
5871 .getClientName())) {
5872 throw new LeaseExpiredException("Lease mismatch: " + block +
5873 " is accessed by a non lease holder " + clientName);
5874 }
5875
5876 return file;
5877 }
5878
5879 /**
5880 * Client is reporting some bad block locations.
5881 */
5882 void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
5883 checkOperation(OperationCategory.WRITE);
5884 NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
5885 writeLock();
5886 try {
5887 checkOperation(OperationCategory.WRITE);
5888 for (int i = 0; i < blocks.length; i++) {
5889 ExtendedBlock blk = blocks[i].getBlock();
5890 DatanodeInfo[] nodes = blocks[i].getLocations();
5891 String[] storageIDs = blocks[i].getStorageIDs();
5892 for (int j = 0; j < nodes.length; j++) {
5893 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j],
5894 storageIDs == null ? null: storageIDs[j],
5895 "client machine reported it");
5896 }
5897 }
5898 } finally {
5899 writeUnlock();
5900 }
5901 }
5902
5903 /**
5904 * Get a new generation stamp together with an access token for
5905 * a block under construction
5906 *
5907 * This method is called for recovering a failed pipeline or setting up
5908 * a pipeline to append to a block.
5909 *
5910 * @param block a block
5911 * @param clientName the name of a client
5912 * @return a located block with a new generation stamp and an access token
5913 * @throws IOException if any error occurs
5914 */
5915 LocatedBlock updateBlockForPipeline(ExtendedBlock block,
5916 String clientName) throws IOException {
5917 LocatedBlock locatedBlock;
5918 checkOperation(OperationCategory.WRITE);
5919 writeLock();
5920 try {
5921 checkOperation(OperationCategory.WRITE);
5922
5923 // check vadility of parameters
5924 checkUCBlock(block, clientName);
5925
5926 // get a new generation stamp and an access token
5927 block.setGenerationStamp(
5928 nextGenerationStamp(isLegacyBlock(block.getLocalBlock())));
5929 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]);
5930 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE);
5931 } finally {
5932 writeUnlock();
5933 }
5934 // Ensure we record the new generation stamp
5935 getEditLog().logSync();
5936 return locatedBlock;
5937 }
5938
5939 /**
5940 * Update a pipeline for a block under construction
5941 *
5942 * @param clientName the name of the client
5943 * @param oldBlock and old block
5944 * @param newBlock a new block with a new generation stamp and length
5945 * @param newNodes datanodes in the pipeline
5946 * @throws IOException if any error occurs
5947 */
5948 void updatePipeline(String clientName, ExtendedBlock oldBlock,
5949 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs)
5950 throws IOException {
5951 checkOperation(OperationCategory.WRITE);
5952 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
5953 if (cacheEntry != null && cacheEntry.isSuccess()) {
5954 return; // Return previous response
5955 }
5956 LOG.info("updatePipeline(block=" + oldBlock
5957 + ", newGenerationStamp=" + newBlock.getGenerationStamp()
5958 + ", newLength=" + newBlock.getNumBytes()
5959 + ", newNodes=" + Arrays.asList(newNodes)
5960 + ", clientName=" + clientName
5961 + ")");
5962 writeLock();
5963 boolean success = false;
5964 try {
5965 checkOperation(OperationCategory.WRITE);
5966 checkNameNodeSafeMode("Pipeline not updated");
5967 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
5968 + oldBlock + " has different block identifier";
5969 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
5970 newStorageIDs, cacheEntry != null);
5971 success = true;
5972 } finally {
5973 writeUnlock();
5974 RetryCache.setState(cacheEntry, success);
5975 }
5976 getEditLog().logSync();
5977 LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
5978 }
5979
5980 /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */
5981 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock,
5982 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
5983 boolean logRetryCache)
5984 throws IOException {
5985 assert hasWriteLock();
5986 // check the vadility of the block and lease holder name
5987 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
5988 final BlockInfoUnderConstruction blockinfo
5989 = (BlockInfoUnderConstruction)pendingFile.getLastBlock();
5990
5991 // check new GS & length: this is not expected
5992 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() ||
5993 newBlock.getNumBytes() < blockinfo.getNumBytes()) {
5994 String msg = "Update " + oldBlock + " (len = " +
5995 blockinfo.getNumBytes() + ") to an older state: " + newBlock +
5996 " (len = " + newBlock.getNumBytes() +")";
5997 LOG.warn(msg);
5998 throw new IOException(msg);
5999 }
6000
6001 // Update old block with the new generation stamp and new length
6002 blockinfo.setNumBytes(newBlock.getNumBytes());
6003 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp());
6004
6005 // find the DatanodeDescriptor objects
6006 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager()
6007 .getDatanodeStorageInfos(newNodes, newStorageIDs);
6008 blockinfo.setExpectedLocations(storages);
6009
6010 String src = pendingFile.getFullPathName();
6011 dir.persistBlocks(src, pendingFile, logRetryCache);
6012 }
6013
6014 // rename was successful. If any part of the renamed subtree had
6015 // files that were being written to, update with new filename.
6016 void unprotectedChangeLease(String src, String dst) {
6017 assert hasWriteLock();
6018 leaseManager.changeLease(src, dst);
6019 }
6020
6021 /**
6022 * @return all the under-construction files in the lease map
6023 */
6024 Map<String, INodeFile> getFilesUnderConstruction() {
6025 synchronized (leaseManager) {
6026 return leaseManager.getINodesUnderConstruction();
6027 }
6028 }
6029
6030 /**
6031 * Register a Backup name-node, verifying that it belongs
6032 * to the correct namespace, and adding it to the set of
6033 * active journals if necessary.
6034 *
6035 * @param bnReg registration of the new BackupNode
6036 * @param nnReg registration of this NameNode
6037 * @throws IOException if the namespace IDs do not match
6038 */
6039 void registerBackupNode(NamenodeRegistration bnReg,
6040 NamenodeRegistration nnReg) throws IOException {
6041 writeLock();
6042 try {
6043 if(getFSImage().getStorage().getNamespaceID()
6044 != bnReg.getNamespaceID())
6045 throw new IOException("Incompatible namespaceIDs: "
6046 + " Namenode namespaceID = "
6047 + getFSImage().getStorage().getNamespaceID() + "; "
6048 + bnReg.getRole() +
6049 " node namespaceID = " + bnReg.getNamespaceID());
6050 if (bnReg.getRole() == NamenodeRole.BACKUP) {
6051 getFSImage().getEditLog().registerBackupNode(
6052 bnReg, nnReg);
6053 }
6054 } finally {
6055 writeUnlock();
6056 }
6057 }
6058
6059 /**
6060 * Release (unregister) backup node.
6061 * <p>
6062 * Find and remove the backup stream corresponding to the node.
6063 * @param registration
6064 * @throws IOException
6065 */
6066 void releaseBackupNode(NamenodeRegistration registration)
6067 throws IOException {
6068 checkOperation(OperationCategory.WRITE);
6069 writeLock();
6070 try {
6071 checkOperation(OperationCategory.WRITE);
6072 if(getFSImage().getStorage().getNamespaceID()
6073 != registration.getNamespaceID())
6074 throw new IOException("Incompatible namespaceIDs: "
6075 + " Namenode namespaceID = "
6076 + getFSImage().getStorage().getNamespaceID() + "; "
6077 + registration.getRole() +
6078 " node namespaceID = " + registration.getNamespaceID());
6079 getEditLog().releaseBackupStream(registration);
6080 } finally {
6081 writeUnlock();
6082 }
6083 }
6084
6085 static class CorruptFileBlockInfo {
6086 final String path;
6087 final Block block;
6088
6089 public CorruptFileBlockInfo(String p, Block b) {
6090 path = p;
6091 block = b;
6092 }
6093
6094 @Override
6095 public String toString() {
6096 return block.getBlockName() + "\t" + path;
6097 }
6098 }
6099 /**
6100 * @param path Restrict corrupt files to this portion of namespace.
6101 * @param startBlockAfter Support for continuation; the set of files we return
6102 * back is ordered by blockid; startBlockAfter tells where to start from
6103 * @return a list in which each entry describes a corrupt file/block
6104 * @throws AccessControlException
6105 * @throws IOException
6106 */
6107 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
6108 String[] cookieTab) throws IOException {
6109 checkSuperuserPrivilege();
6110 checkOperation(OperationCategory.READ);
6111 readLock();
6112 try {
6113 checkOperation(OperationCategory.READ);
6114 if (!isPopulatingReplQueues()) {
6115 throw new IOException("Cannot run listCorruptFileBlocks because " +
6116 "replication queues have not been initialized.");
6117 }
6118 // print a limited # of corrupt files per call
6119 int count = 0;
6120 ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
6121
6122 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator();
6123
6124 if (cookieTab == null) {
6125 cookieTab = new String[] { null };
6126 }
6127 int skip = getIntCookie(cookieTab[0]);
6128 for (int i = 0; i < skip && blkIterator.hasNext(); i++) {
6129 blkIterator.next();
6130 }
6131
6132 while (blkIterator.hasNext()) {
6133 Block blk = blkIterator.next();
6134 final INode inode = (INode)blockManager.getBlockCollection(blk);
6135 skip++;
6136 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
6137 String src = FSDirectory.getFullPathName(inode);
6138 if (src.startsWith(path)){
6139 corruptFiles.add(new CorruptFileBlockInfo(src, blk));
6140 count++;
6141 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
6142 break;
6143 }
6144 }
6145 }
6146 cookieTab[0] = String.valueOf(skip);
6147 LOG.info("list corrupt file blocks returned: " + count);
6148 return corruptFiles;
6149 } finally {
6150 readUnlock();
6151 }
6152 }
6153
6154 /**
6155 * Convert string cookie to integer.
6156 */
6157 private static int getIntCookie(String cookie){
6158 int c;
6159 if(cookie == null){
6160 c = 0;
6161 } else {
6162 try{
6163 c = Integer.parseInt(cookie);
6164 }catch (NumberFormatException e) {
6165 c = 0;
6166 }
6167 }
6168 c = Math.max(0, c);
6169 return c;
6170 }
6171
6172 /**
6173 * Create delegation token secret manager
6174 */
6175 private DelegationTokenSecretManager createDelegationTokenSecretManager(
6176 Configuration conf) {
6177 return new DelegationTokenSecretManager(conf.getLong(
6178 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY,
6179 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT),
6180 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY,
6181 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT),
6182 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
6183 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT),
6184 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL,
6185 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
6186 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT),
6187 this);
6188 }
6189
6190 /**
6191 * Returns the DelegationTokenSecretManager instance in the namesystem.
6192 * @return delegation token secret manager object
6193 */
6194 DelegationTokenSecretManager getDelegationTokenSecretManager() {
6195 return dtSecretManager;
6196 }
6197
6198 /**
6199 * @param renewer
6200 * @return Token<DelegationTokenIdentifier>
6201 * @throws IOException
6202 */
6203 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
6204 throws IOException {
6205 Token<DelegationTokenIdentifier> token;
6206 checkOperation(OperationCategory.WRITE);
6207 writeLock();
6208 try {
6209 checkOperation(OperationCategory.WRITE);
6210 checkNameNodeSafeMode("Cannot issue delegation token");
6211 if (!isAllowedDelegationTokenOp()) {
6212 throw new IOException(
6213 "Delegation Token can be issued only with kerberos or web authentication");
6214 }
6215 if (dtSecretManager == null || !dtSecretManager.isRunning()) {
6216 LOG.warn("trying to get DT with no secret manager running");
6217 return null;
6218 }
6219
6220 UserGroupInformation ugi = getRemoteUser();
6221 String user = ugi.getUserName();
6222 Text owner = new Text(user);
6223 Text realUser = null;
6224 if (ugi.getRealUser() != null) {
6225 realUser = new Text(ugi.getRealUser().getUserName());
6226 }
6227 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
6228 renewer, realUser);
6229 token = new Token<DelegationTokenIdentifier>(
6230 dtId, dtSecretManager);
6231 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
6232 getEditLog().logGetDelegationToken(dtId, expiryTime);
6233 } finally {
6234 writeUnlock();
6235 }
6236 getEditLog().logSync();
6237 return token;
6238 }
6239
6240 /**
6241 *
6242 * @param token
6243 * @return New expiryTime of the token
6244 * @throws InvalidToken
6245 * @throws IOException
6246 */
6247 long renewDelegationToken(Token<DelegationTokenIdentifier> token)
6248 throws InvalidToken, IOException {
6249 long expiryTime;
6250 checkOperation(OperationCategory.WRITE);
6251 writeLock();
6252 try {
6253 checkOperation(OperationCategory.WRITE);
6254
6255 checkNameNodeSafeMode("Cannot renew delegation token");
6256 if (!isAllowedDelegationTokenOp()) {
6257 throw new IOException(
6258 "Delegation Token can be renewed only with kerberos or web authentication");
6259 }
6260 String renewer = getRemoteUser().getShortUserName();
6261 expiryTime = dtSecretManager.renewToken(token, renewer);
6262 DelegationTokenIdentifier id = new DelegationTokenIdentifier();
6263 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
6264 DataInputStream in = new DataInputStream(buf);
6265 id.readFields(in);
6266 getEditLog().logRenewDelegationToken(id, expiryTime);
6267 } finally {
6268 writeUnlock();
6269 }
6270 getEditLog().logSync();
6271 return expiryTime;
6272 }
6273
6274 /**
6275 *
6276 * @param token
6277 * @throws IOException
6278 */
6279 void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
6280 throws IOException {
6281 checkOperation(OperationCategory.WRITE);
6282 writeLock();
6283 try {
6284 checkOperation(OperationCategory.WRITE);
6285
6286 checkNameNodeSafeMode("Cannot cancel delegation token");
6287 String canceller = getRemoteUser().getUserName();
6288 DelegationTokenIdentifier id = dtSecretManager
6289 .cancelToken(token, canceller);
6290 getEditLog().logCancelDelegationToken(id);
6291 } finally {
6292 writeUnlock();
6293 }
6294 getEditLog().logSync();
6295 }
6296
6297 SecretManagerState saveSecretManagerState() {
6298 return dtSecretManager.saveSecretManagerState();
6299 }
6300
6301 /**
6302 * @param in load the state of secret manager from input stream
6303 */
6304 void loadSecretManagerStateCompat(DataInput in) throws IOException {
6305 dtSecretManager.loadSecretManagerStateCompat(in);
6306 }
6307
6308 void loadSecretManagerState(SecretManagerSection s,
6309 List<SecretManagerSection.DelegationKey> keys,
6310 List<SecretManagerSection.PersistToken> tokens) throws IOException {
6311 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
6312 }
6313
6314 /**
6315 * Log the updateMasterKey operation to edit logs
6316 *
6317 * @param key new delegation key.
6318 */
6319 public void logUpdateMasterKey(DelegationKey key) {
6320
6321 assert !isInSafeMode() :
6322 "this should never be called while in safemode, since we stop " +
6323 "the DT manager before entering safemode!";
6324 // No need to hold FSN lock since we don't access any internal
6325 // structures, and this is stopped before the FSN shuts itself
6326 // down, etc.
6327 getEditLog().logUpdateMasterKey(key);
6328 getEditLog().logSync();
6329 }
6330
6331 /**
6332 * Log the cancellation of expired tokens to edit logs
6333 *
6334 * @param id token identifier to cancel
6335 */
6336 public void logExpireDelegationToken(DelegationTokenIdentifier id) {
6337 assert !isInSafeMode() :
6338 "this should never be called while in safemode, since we stop " +
6339 "the DT manager before entering safemode!";
6340 // No need to hold FSN lock since we don't access any internal
6341 // structures, and this is stopped before the FSN shuts itself
6342 // down, etc.
6343 getEditLog().logCancelDelegationToken(id);
6344 }
6345
6346 private void logReassignLease(String leaseHolder, String src,
6347 String newHolder) {
6348 assert hasWriteLock();
6349 getEditLog().logReassignLease(leaseHolder, src, newHolder);
6350 }
6351
6352 /**
6353 *
6354 * @return true if delegation token operation is allowed
6355 */
6356 private boolean isAllowedDelegationTokenOp() throws IOException {
6357 AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
6358 if (UserGroupInformation.isSecurityEnabled()
6359 && (authMethod != AuthenticationMethod.KERBEROS)
6360 && (authMethod != AuthenticationMethod.KERBEROS_SSL)
6361 && (authMethod != AuthenticationMethod.CERTIFICATE)) {
6362 return false;
6363 }
6364 return true;
6365 }
6366
6367 /**
6368 * Returns authentication method used to establish the connection
6369 * @return AuthenticationMethod used to establish connection
6370 * @throws IOException
6371 */
6372 private AuthenticationMethod getConnectionAuthenticationMethod()
6373 throws IOException {
6374 UserGroupInformation ugi = getRemoteUser();
6375 AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
6376 if (authMethod == AuthenticationMethod.PROXY) {
6377 authMethod = ugi.getRealUser().getAuthenticationMethod();
6378 }
6379 return authMethod;
6380 }
6381
6382 /**
6383 * Client invoked methods are invoked over RPC and will be in
6384 * RPC call context even if the client exits.
6385 */
6386 private boolean isExternalInvocation() {
6387 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation();
6388 }
6389
6390 private static InetAddress getRemoteIp() {
6391 InetAddress ip = Server.getRemoteIp();
6392 if (ip != null) {
6393 return ip;
6394 }
6395 return NamenodeWebHdfsMethods.getRemoteIp();
6396 }
6397
6398 // optimize ugi lookup for RPC operations to avoid a trip through
6399 // UGI.getCurrentUser which is synch'ed
6400 private static UserGroupInformation getRemoteUser() throws IOException {
6401 return NameNode.getRemoteUser();
6402 }
6403
6404 /**
6405 * Log fsck event in the audit log
6406 */
6407 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
6408 if (isAuditEnabled()) {
6409 logAuditEvent(true, getRemoteUser(),
6410 remoteAddress,
6411 "fsck", src, null, null);
6412 }
6413 }
6414 /**
6415 * Register NameNodeMXBean
6416 */
6417 private void registerMXBean() {
6418 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this);
6419 }
6420
6421 /**
6422 * Class representing Namenode information for JMX interfaces
6423 */
6424 @Override // NameNodeMXBean
6425 public String getVersion() {
6426 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision();
6427 }
6428
6429 @Override // NameNodeMXBean
6430 public long getUsed() {
6431 return this.getCapacityUsed();
6432 }
6433
6434 @Override // NameNodeMXBean
6435 public long getFree() {
6436 return this.getCapacityRemaining();
6437 }
6438
6439 @Override // NameNodeMXBean
6440 public long getTotal() {
6441 return this.getCapacityTotal();
6442 }
6443
6444 @Override // NameNodeMXBean
6445 public String getSafemode() {
6446 if (!this.isInSafeMode())
6447 return "";
6448 return "Safe mode is ON. " + this.getSafeModeTip();
6449 }
6450
6451 @Override // NameNodeMXBean
6452 public boolean isUpgradeFinalized() {
6453 return this.getFSImage().isUpgradeFinalized();
6454 }
6455
6456 @Override // NameNodeMXBean
6457 public long getNonDfsUsedSpace() {
6458 return datanodeStatistics.getCapacityUsedNonDFS();
6459 }
6460
6461 @Override // NameNodeMXBean
6462 public float getPercentUsed() {
6463 return datanodeStatistics.getCapacityUsedPercent();
6464 }
6465
6466 @Override // NameNodeMXBean
6467 public long getBlockPoolUsedSpace() {
6468 return datanodeStatistics.getBlockPoolUsed();
6469 }
6470
6471 @Override // NameNodeMXBean
6472 public float getPercentBlockPoolUsed() {
6473 return datanodeStatistics.getPercentBlockPoolUsed();
6474 }
6475
6476 @Override // NameNodeMXBean
6477 public float getPercentRemaining() {
6478 return datanodeStatistics.getCapacityRemainingPercent();
6479 }
6480
6481 @Override // NameNodeMXBean
6482 public long getCacheCapacity() {
6483 return datanodeStatistics.getCacheCapacity();
6484 }
6485
6486 @Override // NameNodeMXBean
6487 public long getCacheUsed() {
6488 return datanodeStatistics.getCacheUsed();
6489 }
6490
6491 @Override // NameNodeMXBean
6492 public long getTotalBlocks() {
6493 return getBlocksTotal();
6494 }
6495
6496 @Override // NameNodeMXBean
6497 @Metric
6498 public long getTotalFiles() {
6499 return getFilesTotal();
6500 }
6501
6502 @Override // NameNodeMXBean
6503 public long getNumberOfMissingBlocks() {
6504 return getMissingBlocksCount();
6505 }
6506
6507 @Override // NameNodeMXBean
6508 public int getThreads() {
6509 return ManagementFactory.getThreadMXBean().getThreadCount();
6510 }
6511
6512 /**
6513 * Returned information is a JSON representation of map with host name as the
6514 * key and value is a map of live node attribute keys to its values
6515 */
6516 @Override // NameNodeMXBean
6517 public String getLiveNodes() {
6518 final Map<String, Map<String,Object>> info =
6519 new HashMap<String, Map<String,Object>>();
6520 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6521 blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6522 for (DatanodeDescriptor node : live) {
6523 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6524 .put("infoAddr", node.getInfoAddr())
6525 .put("infoSecureAddr", node.getInfoSecureAddr())
6526 .put("xferaddr", node.getXferAddr())
6527 .put("lastContact", getLastContact(node))
6528 .put("usedSpace", getDfsUsed(node))
6529 .put("adminState", node.getAdminState().toString())
6530 .put("nonDfsUsedSpace", node.getNonDfsUsed())
6531 .put("capacity", node.getCapacity())
6532 .put("numBlocks", node.numBlocks())
6533 .put("version", node.getSoftwareVersion())
6534 .put("used", node.getDfsUsed())
6535 .put("remaining", node.getRemaining())
6536 .put("blockScheduled", node.getBlocksScheduled())
6537 .put("blockPoolUsed", node.getBlockPoolUsed())
6538 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
6539 .put("volfails", node.getVolumeFailures())
6540 .build();
6541
6542 info.put(node.getHostName(), innerinfo);
6543 }
6544 return JSON.toString(info);
6545 }
6546
6547 /**
6548 * Returned information is a JSON representation of map with host name as the
6549 * key and value is a map of dead node attribute keys to its values
6550 */
6551 @Override // NameNodeMXBean
6552 public String getDeadNodes() {
6553 final Map<String, Map<String, Object>> info =
6554 new HashMap<String, Map<String, Object>>();
6555 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
6556 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
6557 for (DatanodeDescriptor node : dead) {
6558 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
6559 .put("lastContact", getLastContact(node))
6560 .put("decommissioned", node.isDecommissioned())
6561 .put("xferaddr", node.getXferAddr())
6562 .build();
6563 info.put(node.getHostName(), innerinfo);
6564 }
6565 return JSON.toString(info);
6566 }
6567
6568 /**
6569 * Returned information is a JSON representation of map with host name as the
6570 * key and value is a map of decomisioning node attribute keys to its values
6571 */
6572 @Override // NameNodeMXBean
6573 public String getDecomNodes() {
6574 final Map<String, Map<String, Object>> info =
6575 new HashMap<String, Map<String, Object>>();
6576 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
6577 ).getDecommissioningNodes();
6578 for (DatanodeDescriptor node : decomNodeList) {
6579 Map<String, Object> innerinfo = ImmutableMap
6580 .<String, Object> builder()
6581 .put("xferaddr", node.getXferAddr())
6582 .put("underReplicatedBlocks",
6583 node.decommissioningStatus.getUnderReplicatedBlocks())
6584 .put("decommissionOnlyReplicas",
6585 node.decommissioningStatus.getDecommissionOnlyReplicas())
6586 .put("underReplicateInOpenFiles",
6587 node.decommissioningStatus.getUnderReplicatedInOpenFiles())
6588 .build();
6589 info.put(node.getHostName(), innerinfo);
6590 }
6591 return JSON.toString(info);
6592 }
6593
6594 private long getLastContact(DatanodeDescriptor alivenode) {
6595 return (Time.now() - alivenode.getLastUpdate())/1000;
6596 }
6597
6598 private long getDfsUsed(DatanodeDescriptor alivenode) {
6599 return alivenode.getDfsUsed();
6600 }
6601
6602 @Override // NameNodeMXBean
6603 public String getClusterId() {
6604 return dir.fsImage.getStorage().getClusterID();
6605 }
6606
6607 @Override // NameNodeMXBean
6608 public String getBlockPoolId() {
6609 return blockPoolId;
6610 }
6611
6612 @Override // NameNodeMXBean
6613 public String getNameDirStatuses() {
6614 Map<String, Map<File, StorageDirType>> statusMap =
6615 new HashMap<String, Map<File, StorageDirType>>();
6616
6617 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>();
6618 for (Iterator<StorageDirectory> it
6619 = getFSImage().getStorage().dirIterator(); it.hasNext();) {
6620 StorageDirectory st = it.next();
6621 activeDirs.put(st.getRoot(), st.getStorageDirType());
6622 }
6623 statusMap.put("active", activeDirs);
6624
6625 List<Storage.StorageDirectory> removedStorageDirs
6626 = getFSImage().getStorage().getRemovedStorageDirs();
6627 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>();
6628 for (StorageDirectory st : removedStorageDirs) {
6629 failedDirs.put(st.getRoot(), st.getStorageDirType());
6630 }
6631 statusMap.put("failed", failedDirs);
6632
6633 return JSON.toString(statusMap);
6634 }
6635
6636 @Override // NameNodeMXBean
6637 public String getNodeUsage() {
6638 float median = 0;
6639 float max = 0;
6640 float min = 0;
6641 float dev = 0;
6642
6643 final Map<String, Map<String,Object>> info =
6644 new HashMap<String, Map<String,Object>>();
6645 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
6646 blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
6647
6648 if (live.size() > 0) {
6649 float totalDfsUsed = 0;
6650 float[] usages = new float[live.size()];
6651 int i = 0;
6652 for (DatanodeDescriptor dn : live) {
6653 usages[i++] = dn.getDfsUsedPercent();
6654 totalDfsUsed += dn.getDfsUsedPercent();
6655 }
6656 totalDfsUsed /= live.size();
6657 Arrays.sort(usages);
6658 median = usages[usages.length / 2];
6659 max = usages[usages.length - 1];
6660 min = usages[0];
6661
6662 for (i = 0; i < usages.length; i++) {
6663 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
6664 }
6665 dev = (float) Math.sqrt(dev / usages.length);
6666 }
6667
6668 final Map<String, Object> innerInfo = new HashMap<String, Object>();
6669 innerInfo.put("min", StringUtils.format("%.2f%%", min));
6670 innerInfo.put("median", StringUtils.format("%.2f%%", median));
6671 innerInfo.put("max", StringUtils.format("%.2f%%", max));
6672 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
6673 info.put("nodeUsage", innerInfo);
6674
6675 return JSON.toString(info);
6676 }
6677
6678 @Override // NameNodeMXBean
6679 public String getNameJournalStatus() {
6680 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
6681 FSEditLog log = getFSImage().getEditLog();
6682 if (log != null) {
6683 boolean openForWrite = log.isOpenForWrite();
6684 for (JournalAndStream jas : log.getJournals()) {
6685 final Map<String, String> jasMap = new HashMap<String, String>();
6686 String manager = jas.getManager().toString();
6687
6688 jasMap.put("required", String.valueOf(jas.isRequired()));
6689 jasMap.put("disabled", String.valueOf(jas.isDisabled()));
6690 jasMap.put("manager", manager);
6691
6692 if (jas.isDisabled()) {
6693 jasMap.put("stream", "Failed");
6694 } else if (openForWrite) {
6695 EditLogOutputStream elos = jas.getCurrentStream();
6696 if (elos != null) {
6697 jasMap.put("stream", elos.generateReport());
6698 } else {
6699 jasMap.put("stream", "not currently writing");
6700 }
6701 } else {
6702 jasMap.put("stream", "open for read");
6703 }
6704 jasList.add(jasMap);
6705 }
6706 }
6707 return JSON.toString(jasList);
6708 }
6709
6710 @Override // NameNodeMxBean
6711 public String getJournalTransactionInfo() {
6712 Map<String, String> txnIdMap = new HashMap<String, String>();
6713 txnIdMap.put("LastAppliedOrWrittenTxId",
6714 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId()));
6715 txnIdMap.put("MostRecentCheckpointTxId",
6716 Long.toString(this.getFSImage().getMostRecentCheckpointTxId()));
6717 return JSON.toString(txnIdMap);
6718 }
6719
6720 @Override // NameNodeMXBean
6721 public String getNNStarted() {
6722 return getStartTime().toString();
6723 }
6724
6725 @Override // NameNodeMXBean
6726 public String getCompileInfo() {
6727 return VersionInfo.getDate() + " by " + VersionInfo.getUser() +
6728 " from " + VersionInfo.getBranch();
6729 }
6730
6731 /** @return the block manager. */
6732 public BlockManager getBlockManager() {
6733 return blockManager;
6734 }
6735 /** @return the FSDirectory. */
6736 public FSDirectory getFSDirectory() {
6737 return dir;
6738 }
6739 /** @return the cache manager. */
6740 public CacheManager getCacheManager() {
6741 return cacheManager;
6742 }
6743
6744 @Override // NameNodeMXBean
6745 public String getCorruptFiles() {
6746 List<String> list = new ArrayList<String>();
6747 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
6748 try {
6749 corruptFileBlocks = listCorruptFileBlocks("/", null);
6750 int corruptFileCount = corruptFileBlocks.size();
6751 if (corruptFileCount != 0) {
6752 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
6753 list.add(c.toString());
6754 }
6755 }
6756 } catch (IOException e) {
6757 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage());
6758 }
6759 return JSON.toString(list);
6760 }
6761
6762 @Override //NameNodeMXBean
6763 public int getDistinctVersionCount() {
6764 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
6765 .size();
6766 }
6767
6768 @Override //NameNodeMXBean
6769 public Map<String, Integer> getDistinctVersions() {
6770 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
6771 }
6772
6773 @Override //NameNodeMXBean
6774 public String getSoftwareVersion() {
6775 return VersionInfo.getVersion();
6776 }
6777
6778 /**
6779 * Verifies that the given identifier and password are valid and match.
6780 * @param identifier Token identifier.
6781 * @param password Password in the token.
6782 */
6783 public synchronized void verifyToken(DelegationTokenIdentifier identifier,
6784 byte[] password) throws InvalidToken, RetriableException {
6785 try {
6786 getDelegationTokenSecretManager().verifyToken(identifier, password);
6787 } catch (InvalidToken it) {
6788 if (inTransitionToActive()) {
6789 throw new RetriableException(it);
6790 }
6791 throw it;
6792 }
6793 }
6794
6795 @Override
6796 public boolean isGenStampInFuture(Block block) {
6797 if (isLegacyBlock(block)) {
6798 return block.getGenerationStamp() > getGenerationStampV1();
6799 } else {
6800 return block.getGenerationStamp() > getGenerationStampV2();
6801 }
6802 }
6803
6804 @VisibleForTesting
6805 public EditLogTailer getEditLogTailer() {
6806 return editLogTailer;
6807 }
6808
6809 @VisibleForTesting
6810 public void setEditLogTailerForTests(EditLogTailer tailer) {
6811 this.editLogTailer = tailer;
6812 }
6813
6814 @VisibleForTesting
6815 void setFsLockForTests(ReentrantReadWriteLock lock) {
6816 this.fsLock.coarseLock = lock;
6817 }
6818
6819 @VisibleForTesting
6820 public ReentrantReadWriteLock getFsLockForTests() {
6821 return fsLock.coarseLock;
6822 }
6823
6824 @VisibleForTesting
6825 public ReentrantLock getLongReadLockForTests() {
6826 return fsLock.longReadLock;
6827 }
6828
6829 @VisibleForTesting
6830 public SafeModeInfo getSafeModeInfoForTests() {
6831 return safeMode;
6832 }
6833
6834 @VisibleForTesting
6835 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
6836 this.nnResourceChecker = nnResourceChecker;
6837 }
6838
6839 @Override
6840 public boolean isAvoidingStaleDataNodesForWrite() {
6841 return this.blockManager.getDatanodeManager()
6842 .shouldAvoidStaleDataNodesForWrite();
6843 }
6844
6845 @Override // FSClusterStats
6846 public int getNumDatanodesInService() {
6847 return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
6848 }
6849
6850 public SnapshotManager getSnapshotManager() {
6851 return snapshotManager;
6852 }
6853
6854 /** Allow snapshot on a directroy. */
6855 void allowSnapshot(String path) throws SafeModeException, IOException {
6856 checkOperation(OperationCategory.WRITE);
6857 writeLock();
6858 try {
6859 checkOperation(OperationCategory.WRITE);
6860 checkNameNodeSafeMode("Cannot allow snapshot for " + path);
6861 checkSuperuserPrivilege();
6862
6863 dir.writeLock();
6864 try {
6865 snapshotManager.setSnapshottable(path, true);
6866 } finally {
6867 dir.writeUnlock();
6868 }
6869 getEditLog().logAllowSnapshot(path);
6870 } finally {
6871 writeUnlock();
6872 }
6873 getEditLog().logSync();
6874
6875 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6876 logAuditEvent(true, "allowSnapshot", path, null, null);
6877 }
6878 }
6879
6880 /** Disallow snapshot on a directory. */
6881 void disallowSnapshot(String path) throws SafeModeException, IOException {
6882 checkOperation(OperationCategory.WRITE);
6883 writeLock();
6884 try {
6885 checkOperation(OperationCategory.WRITE);
6886 checkNameNodeSafeMode("Cannot disallow snapshot for " + path);
6887 checkSuperuserPrivilege();
6888
6889 dir.writeLock();
6890 try {
6891 snapshotManager.resetSnapshottable(path);
6892 } finally {
6893 dir.writeUnlock();
6894 }
6895 getEditLog().logDisallowSnapshot(path);
6896 } finally {
6897 writeUnlock();
6898 }
6899 getEditLog().logSync();
6900
6901 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6902 logAuditEvent(true, "disallowSnapshot", path, null, null);
6903 }
6904 }
6905
6906 /**
6907 * Create a snapshot
6908 * @param snapshotRoot The directory path where the snapshot is taken
6909 * @param snapshotName The name of the snapshot
6910 */
6911 String createSnapshot(String snapshotRoot, String snapshotName)
6912 throws SafeModeException, IOException {
6913 checkOperation(OperationCategory.WRITE);
6914 final FSPermissionChecker pc = getPermissionChecker();
6915 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
6916 null);
6917 if (cacheEntry != null && cacheEntry.isSuccess()) {
6918 return (String) cacheEntry.getPayload();
6919 }
6920 writeLock();
6921 String snapshotPath = null;
6922 try {
6923 checkOperation(OperationCategory.WRITE);
6924 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);
6925 if (isPermissionEnabled) {
6926 checkOwner(pc, snapshotRoot);
6927 }
6928
6929 if (snapshotName == null || snapshotName.isEmpty()) {
6930 snapshotName = Snapshot.generateDefaultSnapshotName();
6931 }
6932 if(snapshotName != null){
6933 if (!DFSUtil.isValidNameForComponent(snapshotName)) {
6934 throw new InvalidPathException("Invalid snapshot name: "
6935 + snapshotName);
6936 }
6937 }
6938 dir.verifySnapshotName(snapshotName, snapshotRoot);
6939 dir.writeLock();
6940 try {
6941 snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName);
6942 } finally {
6943 dir.writeUnlock();
6944 }
6945 getEditLog().logCreateSnapshot(snapshotRoot, snapshotName,
6946 cacheEntry != null);
6947 } finally {
6948 writeUnlock();
6949 RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath);
6950 }
6951 getEditLog().logSync();
6952
6953 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6954 logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null);
6955 }
6956 return snapshotPath;
6957 }
6958
6959 /**
6960 * Rename a snapshot
6961 * @param path The directory path where the snapshot was taken
6962 * @param snapshotOldName Old snapshot name
6963 * @param snapshotNewName New snapshot name
6964 * @throws SafeModeException
6965 * @throws IOException
6966 */
6967 void renameSnapshot(String path, String snapshotOldName,
6968 String snapshotNewName) throws SafeModeException, IOException {
6969 checkOperation(OperationCategory.WRITE);
6970 final FSPermissionChecker pc = getPermissionChecker();
6971 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
6972 if (cacheEntry != null && cacheEntry.isSuccess()) {
6973 return; // Return previous response
6974 }
6975 writeLock();
6976 boolean success = false;
6977 try {
6978 checkOperation(OperationCategory.WRITE);
6979 checkNameNodeSafeMode("Cannot rename snapshot for " + path);
6980 if (isPermissionEnabled) {
6981 checkOwner(pc, path);
6982 }
6983 dir.verifySnapshotName(snapshotNewName, path);
6984
6985 snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName);
6986 getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName,
6987 cacheEntry != null);
6988 success = true;
6989 } finally {
6990 writeUnlock();
6991 RetryCache.setState(cacheEntry, success);
6992 }
6993 getEditLog().logSync();
6994
6995 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
6996 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName);
6997 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName);
6998 logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null);
6999 }
7000 }
7001
7002 /**
7003 * Get the list of snapshottable directories that are owned
7004 * by the current user. Return all the snapshottable directories if the
7005 * current user is a super user.
7006 * @return The list of all the current snapshottable directories
7007 * @throws IOException
7008 */
7009 public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
7010 throws IOException {
7011 SnapshottableDirectoryStatus[] status = null;
7012 checkOperation(OperationCategory.READ);
7013 final FSPermissionChecker checker = getPermissionChecker();
7014 readLock();
7015 try {
7016 checkOperation(OperationCategory.READ);
7017 final String user = checker.isSuperUser()? null : checker.getUser();
7018 status = snapshotManager.getSnapshottableDirListing(user);
7019 } finally {
7020 readUnlock();
7021 }
7022 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7023 logAuditEvent(true, "listSnapshottableDirectory", null, null, null);
7024 }
7025 return status;
7026 }
7027
7028 /**
7029 * Get the difference between two snapshots (or between a snapshot and the
7030 * current status) of a snapshottable directory.
7031 *
7032 * @param path The full path of the snapshottable directory.
7033 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null
7034 * or empty string indicates the current tree.
7035 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or
7036 * empty string indicates the current tree.
7037 * @return A report about the difference between {@code fromSnapshot} and
7038 * {@code toSnapshot}. Modified/deleted/created/renamed files and
7039 * directories belonging to the snapshottable directories are listed
7040 * and labeled as M/-/+/R respectively.
7041 * @throws IOException
7042 */
7043 SnapshotDiffReport getSnapshotDiffReport(String path,
7044 String fromSnapshot, String toSnapshot) throws IOException {
7045 SnapshotDiffInfo diffs = null;
7046 checkOperation(OperationCategory.READ);
7047 final FSPermissionChecker pc = getPermissionChecker();
7048 readLock();
7049 try {
7050 checkOperation(OperationCategory.READ);
7051 if (isPermissionEnabled) {
7052 checkSubtreeReadPermission(pc, path, fromSnapshot);
7053 checkSubtreeReadPermission(pc, path, toSnapshot);
7054 }
7055 diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot);
7056 } finally {
7057 readUnlock();
7058 }
7059
7060 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7061 logAuditEvent(true, "computeSnapshotDiff", null, null, null);
7062 }
7063 return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
7064 path, fromSnapshot, toSnapshot,
7065 Collections.<DiffReportEntry> emptyList());
7066 }
7067
7068 private void checkSubtreeReadPermission(final FSPermissionChecker pc,
7069 final String snapshottablePath, final String snapshot)
7070 throws AccessControlException, UnresolvedLinkException {
7071 final String fromPath = snapshot == null?
7072 snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot);
7073 checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ);
7074 }
7075
7076 /**
7077 * Delete a snapshot of a snapshottable directory
7078 * @param snapshotRoot The snapshottable directory
7079 * @param snapshotName The name of the to-be-deleted snapshot
7080 * @throws SafeModeException
7081 * @throws IOException
7082 */
7083 void deleteSnapshot(String snapshotRoot, String snapshotName)
7084 throws SafeModeException, IOException {
7085 checkOperation(OperationCategory.WRITE);
7086 final FSPermissionChecker pc = getPermissionChecker();
7087
7088 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7089 if (cacheEntry != null && cacheEntry.isSuccess()) {
7090 return; // Return previous response
7091 }
7092 boolean success = false;
7093 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
7094 writeLock();
7095 try {
7096 checkOperation(OperationCategory.WRITE);
7097 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot);
7098 if (isPermissionEnabled) {
7099 checkOwner(pc, snapshotRoot);
7100 }
7101
7102 List<INode> removedINodes = new ChunkedArrayList<INode>();
7103 dir.writeLock();
7104 try {
7105 snapshotManager.deleteSnapshot(snapshotRoot, snapshotName,
7106 collectedBlocks, removedINodes);
7107 dir.removeFromInodeMap(removedINodes);
7108 } finally {
7109 dir.writeUnlock();
7110 }
7111 removedINodes.clear();
7112 getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName,
7113 cacheEntry != null);
7114 success = true;
7115 } finally {
7116 writeUnlock();
7117 RetryCache.setState(cacheEntry, success);
7118 }
7119 getEditLog().logSync();
7120
7121 removeBlocks(collectedBlocks);
7122 collectedBlocks.clear();
7123
7124 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7125 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
7126 logAuditEvent(true, "deleteSnapshot", rootPath, null, null);
7127 }
7128 }
7129
7130 /**
7131 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager
7132 * @param toRemove the list of INodeDirectorySnapshottable to be removed
7133 */
7134 void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) {
7135 if (snapshotManager != null) {
7136 snapshotManager.removeSnapshottable(toRemove);
7137 }
7138 }
7139
7140 RollingUpgradeInfo queryRollingUpgrade() throws IOException {
7141 checkSuperuserPrivilege();
7142 checkOperation(OperationCategory.READ);
7143 readLock();
7144 try {
7145 if (rollingUpgradeInfo != null) {
7146 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
7147 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
7148 }
7149 return rollingUpgradeInfo;
7150 } finally {
7151 readUnlock();
7152 }
7153 }
7154
7155 RollingUpgradeInfo startRollingUpgrade() throws IOException {
7156 checkSuperuserPrivilege();
7157 checkOperation(OperationCategory.WRITE);
7158 writeLock();
7159 try {
7160 checkOperation(OperationCategory.WRITE);
7161 long startTime = now();
7162 if (!haEnabled) { // for non-HA, we require NN to be in safemode
7163 startRollingUpgradeInternalForNonHA(startTime);
7164 } else { // for HA, NN cannot be in safemode
7165 checkNameNodeSafeMode("Failed to start rolling upgrade");
7166 startRollingUpgradeInternal(startTime);
7167 }
7168
7169 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
7170 if (haEnabled) {
7171 // roll the edit log to make sure the standby NameNode can tail
7172 getFSImage().rollEditLog();
7173 }
7174 } finally {
7175 writeUnlock();
7176 }
7177
7178 getEditLog().logSync();
7179 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7180 logAuditEvent(true, "startRollingUpgrade", null, null, null);
7181 }
7182 return rollingUpgradeInfo;
7183 }
7184
7185 /**
7186 * Update internal state to indicate that a rolling upgrade is in progress.
7187 * @param startTime
7188 */
7189 void startRollingUpgradeInternal(long startTime)
7190 throws IOException {
7191 checkRollingUpgrade("start rolling upgrade");
7192 getFSImage().checkUpgrade(this);
7193 setRollingUpgradeInfo(false, startTime);
7194 }
7195
7196 /**
7197 * Update internal state to indicate that a rolling upgrade is in progress for
7198 * non-HA setup. This requires the namesystem is in SafeMode and after doing a
7199 * checkpoint for rollback the namesystem will quit the safemode automatically
7200 */
7201 private void startRollingUpgradeInternalForNonHA(long startTime)
7202 throws IOException {
7203 Preconditions.checkState(!haEnabled);
7204 if (!isInSafeMode()) {
7205 throw new IOException("Safe mode should be turned ON "
7206 + "in order to create namespace image.");
7207 }
7208 checkRollingUpgrade("start rolling upgrade");
7209 getFSImage().checkUpgrade(this);
7210 // in non-HA setup, we do an extra ckpt to generate a rollback image
7211 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
7212 LOG.info("Successfully saved namespace for preparing rolling upgrade.");
7213
7214 // leave SafeMode automatically
7215 setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
7216 setRollingUpgradeInfo(true, startTime);
7217 }
7218
7219 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) {
7220 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId,
7221 createdRollbackImages, startTime, 0L);
7222 }
7223
7224 public void setCreatedRollbackImages(boolean created) {
7225 if (rollingUpgradeInfo != null) {
7226 rollingUpgradeInfo.setCreatedRollbackImages(created);
7227 }
7228 }
7229
7230 public RollingUpgradeInfo getRollingUpgradeInfo() {
7231 return rollingUpgradeInfo;
7232 }
7233
7234 public boolean isNeedRollbackFsImage() {
7235 return needRollbackFsImage;
7236 }
7237
7238 public void setNeedRollbackFsImage(boolean needRollbackFsImage) {
7239 this.needRollbackFsImage = needRollbackFsImage;
7240 }
7241
7242 @Override // NameNodeMXBean
7243 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() {
7244 readLock();
7245 try {
7246 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo();
7247 if (upgradeInfo != null) {
7248 return new RollingUpgradeInfo.Bean(upgradeInfo);
7249 }
7250 return null;
7251 } finally {
7252 readUnlock();
7253 }
7254 }
7255
7256 /** Is rolling upgrade in progress? */
7257 public boolean isRollingUpgrade() {
7258 return rollingUpgradeInfo != null;
7259 }
7260
7261 void checkRollingUpgrade(String action) throws RollingUpgradeException {
7262 if (isRollingUpgrade()) {
7263 throw new RollingUpgradeException("Failed to " + action
7264 + " since a rolling upgrade is already in progress."
7265 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo);
7266 }
7267 }
7268
7269 RollingUpgradeInfo finalizeRollingUpgrade() throws IOException {
7270 checkSuperuserPrivilege();
7271 checkOperation(OperationCategory.WRITE);
7272 writeLock();
7273 final RollingUpgradeInfo returnInfo;
7274 try {
7275 checkOperation(OperationCategory.WRITE);
7276 checkNameNodeSafeMode("Failed to finalize rolling upgrade");
7277
7278 returnInfo = finalizeRollingUpgradeInternal(now());
7279 getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
7280 getFSImage().saveNamespace(this);
7281 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
7282 NameNodeFile.IMAGE);
7283 } finally {
7284 writeUnlock();
7285 }
7286
7287 // getEditLog().logSync() is not needed since it does saveNamespace
7288
7289 if (auditLog.isInfoEnabled() && isExternalInvocation()) {
7290 logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);
7291 }
7292 return returnInfo;
7293 }
7294
7295 RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime)
7296 throws RollingUpgradeException {
7297 if (!isRollingUpgrade()) {
7298 throw new RollingUpgradeException(
7299 "Failed to finalize rolling upgrade since there is no rolling upgrade in progress.");
7300 }
7301
7302 final long startTime = rollingUpgradeInfo.getStartTime();
7303 rollingUpgradeInfo = null;
7304 return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
7305 }
7306
7307 long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
7308 throws IOException {
7309 checkOperation(OperationCategory.WRITE);
7310 final FSPermissionChecker pc = isPermissionEnabled ?
7311 getPermissionChecker() : null;
7312 CacheEntryWithPayload cacheEntry =
7313 RetryCache.waitForCompletion(retryCache, null);
7314 if (cacheEntry != null && cacheEntry.isSuccess()) {
7315 return (Long) cacheEntry.getPayload();
7316 }
7317 boolean success = false;
7318 if (!flags.contains(CacheFlag.FORCE)) {
7319 cacheManager.waitForRescanIfNeeded();
7320 }
7321 writeLock();
7322 Long result = null;
7323 try {
7324 checkOperation(OperationCategory.WRITE);
7325 if (isInSafeMode()) {
7326 throw new SafeModeException(
7327 "Cannot add cache directive", safeMode);
7328 }
7329 if (directive.getId() != null) {
7330 throw new IOException("addDirective: you cannot specify an ID " +
7331 "for this operation.");
7332 }
7333 CacheDirectiveInfo effectiveDirective =
7334 cacheManager.addDirective(directive, pc, flags);
7335 getEditLog().logAddCacheDirectiveInfo(effectiveDirective,
7336 cacheEntry != null);
7337 result = effectiveDirective.getId();
7338 success = true;
7339 } finally {
7340 writeUnlock();
7341 if (success) {
7342 getEditLog().logSync();
7343 }
7344 if (isAuditEnabled() && isExternalInvocation()) {
7345 logAuditEvent(success, "addCacheDirective", null, null, null);
7346 }
7347 RetryCache.setState(cacheEntry, success, result);
7348 }
7349 return result;
7350 }
7351
7352 void modifyCacheDirective(CacheDirectiveInfo directive,
7353 EnumSet<CacheFlag> flags) throws IOException {
7354 checkOperation(OperationCategory.WRITE);
7355 final FSPermissionChecker pc = isPermissionEnabled ?
7356 getPermissionChecker() : null;
7357 boolean success = false;
7358 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7359 if (cacheEntry != null && cacheEntry.isSuccess()) {
7360 return;
7361 }
7362 if (!flags.contains(CacheFlag.FORCE)) {
7363 cacheManager.waitForRescanIfNeeded();
7364 }
7365 writeLock();
7366 try {
7367 checkOperation(OperationCategory.WRITE);
7368 if (isInSafeMode()) {
7369 throw new SafeModeException(
7370 "Cannot add cache directive", safeMode);
7371 }
7372 cacheManager.modifyDirective(directive, pc, flags);
7373 getEditLog().logModifyCacheDirectiveInfo(directive,
7374 cacheEntry != null);
7375 success = true;
7376 } finally {
7377 writeUnlock();
7378 if (success) {
7379 getEditLog().logSync();
7380 }
7381 if (isAuditEnabled() && isExternalInvocation()) {
7382 logAuditEvent(success, "modifyCacheDirective", null, null, null);
7383 }
7384 RetryCache.setState(cacheEntry, success);
7385 }
7386 }
7387
7388 void removeCacheDirective(Long id) throws IOException {
7389 checkOperation(OperationCategory.WRITE);
7390 final FSPermissionChecker pc = isPermissionEnabled ?
7391 getPermissionChecker() : null;
7392 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7393 if (cacheEntry != null && cacheEntry.isSuccess()) {
7394 return;
7395 }
7396 boolean success = false;
7397 writeLock();
7398 try {
7399 checkOperation(OperationCategory.WRITE);
7400 if (isInSafeMode()) {
7401 throw new SafeModeException(
7402 "Cannot remove cache directives", safeMode);
7403 }
7404 cacheManager.removeDirective(id, pc);
7405 getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null);
7406 success = true;
7407 } finally {
7408 writeUnlock();
7409 if (isAuditEnabled() && isExternalInvocation()) {
7410 logAuditEvent(success, "removeCacheDirective", null, null,
7411 null);
7412 }
7413 RetryCache.setState(cacheEntry, success);
7414 }
7415 getEditLog().logSync();
7416 }
7417
7418 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives(
7419 long startId, CacheDirectiveInfo filter) throws IOException {
7420 checkOperation(OperationCategory.READ);
7421 final FSPermissionChecker pc = isPermissionEnabled ?
7422 getPermissionChecker() : null;
7423 BatchedListEntries<CacheDirectiveEntry> results;
7424 cacheManager.waitForRescanIfNeeded();
7425 readLock();
7426 boolean success = false;
7427 try {
7428 checkOperation(OperationCategory.READ);
7429 results =
7430 cacheManager.listCacheDirectives(startId, filter, pc);
7431 success = true;
7432 } finally {
7433 readUnlock();
7434 if (isAuditEnabled() && isExternalInvocation()) {
7435 logAuditEvent(success, "listCacheDirectives", null, null,
7436 null);
7437 }
7438 }
7439 return results;
7440 }
7441
7442 public void addCachePool(CachePoolInfo req) throws IOException {
7443 checkOperation(OperationCategory.WRITE);
7444 final FSPermissionChecker pc = isPermissionEnabled ?
7445 getPermissionChecker() : null;
7446 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7447 if (cacheEntry != null && cacheEntry.isSuccess()) {
7448 return; // Return previous response
7449 }
7450 writeLock();
7451 boolean success = false;
7452 try {
7453 checkOperation(OperationCategory.WRITE);
7454 if (isInSafeMode()) {
7455 throw new SafeModeException(
7456 "Cannot add cache pool " + req.getPoolName(), safeMode);
7457 }
7458 if (pc != null) {
7459 pc.checkSuperuserPrivilege();
7460 }
7461 CachePoolInfo info = cacheManager.addCachePool(req);
7462 getEditLog().logAddCachePool(info, cacheEntry != null);
7463 success = true;
7464 } finally {
7465 writeUnlock();
7466 if (isAuditEnabled() && isExternalInvocation()) {
7467 logAuditEvent(success, "addCachePool", req.getPoolName(), null, null);
7468 }
7469 RetryCache.setState(cacheEntry, success);
7470 }
7471
7472 getEditLog().logSync();
7473 }
7474
7475 public void modifyCachePool(CachePoolInfo req) throws IOException {
7476 checkOperation(OperationCategory.WRITE);
7477 final FSPermissionChecker pc =
7478 isPermissionEnabled ? getPermissionChecker() : null;
7479 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7480 if (cacheEntry != null && cacheEntry.isSuccess()) {
7481 return; // Return previous response
7482 }
7483 writeLock();
7484 boolean success = false;
7485 try {
7486 checkOperation(OperationCategory.WRITE);
7487 if (isInSafeMode()) {
7488 throw new SafeModeException(
7489 "Cannot modify cache pool " + req.getPoolName(), safeMode);
7490 }
7491 if (pc != null) {
7492 pc.checkSuperuserPrivilege();
7493 }
7494 cacheManager.modifyCachePool(req);
7495 getEditLog().logModifyCachePool(req, cacheEntry != null);
7496 success = true;
7497 } finally {
7498 writeUnlock();
7499 if (isAuditEnabled() && isExternalInvocation()) {
7500 logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null);
7501 }
7502 RetryCache.setState(cacheEntry, success);
7503 }
7504
7505 getEditLog().logSync();
7506 }
7507
7508 public void removeCachePool(String cachePoolName) throws IOException {
7509 checkOperation(OperationCategory.WRITE);
7510 final FSPermissionChecker pc =
7511 isPermissionEnabled ? getPermissionChecker() : null;
7512 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache);
7513 if (cacheEntry != null && cacheEntry.isSuccess()) {
7514 return; // Return previous response
7515 }
7516 writeLock();
7517 boolean success = false;
7518 try {
7519 checkOperation(OperationCategory.WRITE);
7520 if (isInSafeMode()) {
7521 throw new SafeModeException(
7522 "Cannot remove cache pool " + cachePoolName, safeMode);
7523 }
7524 if (pc != null) {
7525 pc.checkSuperuserPrivilege();
7526 }
7527 cacheManager.removeCachePool(cachePoolName);
7528 getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null);
7529 success = true;
7530 } finally {
7531 writeUnlock();
7532 if (isAuditEnabled() && isExternalInvocation()) {
7533 logAuditEvent(success, "removeCachePool", cachePoolName, null, null);
7534 }
7535 RetryCache.setState(cacheEntry, success);
7536 }
7537
7538 getEditLog().logSync();
7539 }
7540
7541 public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey)
7542 throws IOException {
7543 final FSPermissionChecker pc =
7544 isPermissionEnabled ? getPermissionChecker() : null;
7545 BatchedListEntries<CachePoolEntry> results;
7546 checkOperation(OperationCategory.READ);
7547 boolean success = false;
7548 cacheManager.waitForRescanIfNeeded();
7549 readLock();
7550 try {
7551 checkOperation(OperationCategory.READ);
7552 results = cacheManager.listCachePools(pc, prevKey);
7553 success = true;
7554 } finally {
7555 readUnlock();
7556 if (isAuditEnabled() && isExternalInvocation()) {
7557 logAuditEvent(success, "listCachePools", null, null, null);
7558 }
7559 }
7560 return results;
7561 }
7562
7563 void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7564 aclConfigFlag.checkForApiCall();
7565 HdfsFileStatus resultingStat = null;
7566 FSPermissionChecker pc = getPermissionChecker();
7567 checkOperation(OperationCategory.WRITE);
7568 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7569 writeLock();
7570 try {
7571 checkOperation(OperationCategory.WRITE);
7572 checkNameNodeSafeMode("Cannot modify ACL entries on " + src);
7573 src = FSDirectory.resolvePath(src, pathComponents, dir);
7574 checkOwner(pc, src);
7575 dir.modifyAclEntries(src, aclSpec);
7576 resultingStat = getAuditFileInfo(src, false);
7577 } finally {
7578 writeUnlock();
7579 }
7580 getEditLog().logSync();
7581 logAuditEvent(true, "modifyAclEntries", src, null, resultingStat);
7582 }
7583
7584 void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
7585 aclConfigFlag.checkForApiCall();
7586 HdfsFileStatus resultingStat = null;
7587 FSPermissionChecker pc = getPermissionChecker();
7588 checkOperation(OperationCategory.WRITE);
7589 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7590 writeLock();
7591 try {
7592 checkOperation(OperationCategory.WRITE);
7593 checkNameNodeSafeMode("Cannot remove ACL entries on " + src);
7594 src = FSDirectory.resolvePath(src, pathComponents, dir);
7595 checkOwner(pc, src);
7596 dir.removeAclEntries(src, aclSpec);
7597 resultingStat = getAuditFileInfo(src, false);
7598 } finally {
7599 writeUnlock();
7600 }
7601 getEditLog().logSync();
7602 logAuditEvent(true, "removeAclEntries", src, null, resultingStat);
7603 }
7604
7605 void removeDefaultAcl(String src) throws IOException {
7606 aclConfigFlag.checkForApiCall();
7607 HdfsFileStatus resultingStat = null;
7608 FSPermissionChecker pc = getPermissionChecker();
7609 checkOperation(OperationCategory.WRITE);
7610 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7611 writeLock();
7612 try {
7613 checkOperation(OperationCategory.WRITE);
7614 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src);
7615 src = FSDirectory.resolvePath(src, pathComponents, dir);
7616 checkOwner(pc, src);
7617 dir.removeDefaultAcl(src);
7618 resultingStat = getAuditFileInfo(src, false);
7619 } finally {
7620 writeUnlock();
7621 }
7622 getEditLog().logSync();
7623 logAuditEvent(true, "removeDefaultAcl", src, null, resultingStat);
7624 }
7625
7626 void removeAcl(String src) throws IOException {
7627 aclConfigFlag.checkForApiCall();
7628 HdfsFileStatus resultingStat = null;
7629 FSPermissionChecker pc = getPermissionChecker();
7630 checkOperation(OperationCategory.WRITE);
7631 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7632 writeLock();
7633 try {
7634 checkOperation(OperationCategory.WRITE);
7635 checkNameNodeSafeMode("Cannot remove ACL on " + src);
7636 src = FSDirectory.resolvePath(src, pathComponents, dir);
7637 checkOwner(pc, src);
7638 dir.removeAcl(src);
7639 resultingStat = getAuditFileInfo(src, false);
7640 } finally {
7641 writeUnlock();
7642 }
7643 getEditLog().logSync();
7644 logAuditEvent(true, "removeAcl", src, null, resultingStat);
7645 }
7646
7647 void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
7648 aclConfigFlag.checkForApiCall();
7649 HdfsFileStatus resultingStat = null;
7650 FSPermissionChecker pc = getPermissionChecker();
7651 checkOperation(OperationCategory.WRITE);
7652 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
7653 writeLock();
7654 try {
7655 checkOperation(OperationCategory.WRITE);
7656 checkNameNodeSafeMode("Cannot set ACL on " + src);
7657 src = FSDirectory.resolvePath(src, pathComponents, dir);
7658 checkOwner(pc, src);
7659 dir.setAcl(src, aclSpec);
7660 resultingStat = getAuditFileInfo(src, false);
7661 } finally {
7662 writeUnlock();
7663 }
7664 getEditLog().logSync();
7665 logAuditEvent(true, "setAcl", src, null, resultingStat);
7666 }
7667
7668 AclStatus getAclStatus(String src) throws IOException {
7669 aclConfigFlag.checkForApiCall();
7670 FSPermissionChecker pc = getPermissionChecker();
7671 checkOperation(OperationCategory.READ);
7672 readLock();
7673 try {
7674 checkOperation(OperationCategory.READ);
7675 if (isPermissionEnabled) {
7676 checkPermission(pc, src, false, null, null, null, null);
7677 }
7678 return dir.getAclStatus(src);
7679 } finally {
7680 readUnlock();
7681 }
7682 }
7683
7684 /**
7685 * Default AuditLogger implementation; used when no access logger is
7686 * defined in the config file. It can also be explicitly listed in the
7687 * config file.
7688 */
7689 private static class DefaultAuditLogger extends HdfsAuditLogger {
7690
7691 private boolean logTokenTrackingId;
7692
7693 @Override
7694 public void initialize(Configuration conf) {
7695 logTokenTrackingId = conf.getBoolean(
7696 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY,
7697 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT);
7698 }
7699
7700 @Override
7701 public void logAuditEvent(boolean succeeded, String userName,
7702 InetAddress addr, String cmd, String src, String dst,
7703 FileStatus status, UserGroupInformation ugi,
7704 DelegationTokenSecretManager dtSecretManager) {
7705 if (auditLog.isInfoEnabled()) {
7706 final StringBuilder sb = auditBuffer.get();
7707 sb.setLength(0);
7708 sb.append("allowed=").append(succeeded).append("\t");
7709 sb.append("ugi=").append(userName).append("\t");
7710 sb.append("ip=").append(addr).append("\t");
7711 sb.append("cmd=").append(cmd).append("\t");
7712 sb.append("src=").append(src).append("\t");
7713 sb.append("dst=").append(dst).append("\t");
7714 if (null == status) {
7715 sb.append("perm=null");
7716 } else {
7717 sb.append("perm=");
7718 sb.append(status.getOwner()).append(":");
7719 sb.append(status.getGroup()).append(":");
7720 sb.append(status.getPermission());
7721 }
7722 if (logTokenTrackingId) {
7723 sb.append("\t").append("trackingId=");
7724 String trackingId = null;
7725 if (ugi != null && dtSecretManager != null
7726 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) {
7727 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) {
7728 if (tid instanceof DelegationTokenIdentifier) {
7729 DelegationTokenIdentifier dtid =
7730 (DelegationTokenIdentifier)tid;
7731 trackingId = dtSecretManager.getTokenTrackingId(dtid);
7732 break;
7733 }
7734 }
7735 }
7736 sb.append(trackingId);
7737 }
7738 logAuditMessage(sb.toString());
7739 }
7740 }
7741
7742 public void logAuditMessage(String message) {
7743 auditLog.info(message);
7744 }
7745 }
7746
7747 private static void enableAsyncAuditLog() {
7748 if (!(auditLog instanceof Log4JLogger)) {
7749 LOG.warn("Log4j is required to enable async auditlog");
7750 return;
7751 }
7752 Logger logger = ((Log4JLogger)auditLog).getLogger();
7753 @SuppressWarnings("unchecked")
7754 List<Appender> appenders = Collections.list(logger.getAllAppenders());
7755 // failsafe against trying to async it more than once
7756 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
7757 AsyncAppender asyncAppender = new AsyncAppender();
7758 // change logger to have an async appender containing all the
7759 // previously configured appenders
7760 for (Appender appender : appenders) {
7761 logger.removeAppender(appender);
7762 asyncAppender.addAppender(appender);
7763 }
7764 logger.addAppender(asyncAppender);
7765 }
7766 }
7767
7768 }
7769