001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import com.google.common.annotations.VisibleForTesting;
021 import com.google.common.base.Joiner;
022 import com.google.common.base.Preconditions;
023 import com.google.common.collect.Lists;
024
025 import org.apache.commons.logging.Log;
026 import org.apache.commons.logging.LogFactory;
027 import org.apache.hadoop.HadoopIllegalArgumentException;
028 import org.apache.hadoop.classification.InterfaceAudience;
029 import org.apache.hadoop.conf.Configuration;
030 import org.apache.hadoop.fs.FileSystem;
031 import org.apache.hadoop.fs.Trash;
032 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034 import org.apache.hadoop.ha.HAServiceStatus;
035 import org.apache.hadoop.ha.HealthCheckFailedException;
036 import org.apache.hadoop.ha.ServiceFailedException;
037 import org.apache.hadoop.hdfs.DFSConfigKeys;
038 import org.apache.hadoop.hdfs.DFSUtil;
039 import org.apache.hadoop.hdfs.HAUtil;
040 import org.apache.hadoop.hdfs.HdfsConfiguration;
041 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046 import org.apache.hadoop.hdfs.server.namenode.ha.*;
047 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050 import org.apache.hadoop.hdfs.server.protocol.*;
051 import org.apache.hadoop.ipc.Server;
052 import org.apache.hadoop.ipc.StandbyException;
053 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054 import org.apache.hadoop.metrics2.util.MBeans;
055 import org.apache.hadoop.net.NetUtils;
056 import org.apache.hadoop.security.AccessControlException;
057 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058 import org.apache.hadoop.security.SecurityUtil;
059 import org.apache.hadoop.security.UserGroupInformation;
060 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061 import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062 import org.apache.hadoop.tools.GetUserMappingsProtocol;
063 import org.apache.hadoop.util.ExitUtil.ExitException;
064 import org.apache.hadoop.util.JvmPauseMonitor;
065 import org.apache.hadoop.util.ServicePlugin;
066 import org.apache.hadoop.util.StringUtils;
067
068 import javax.management.ObjectName;
069
070 import java.io.IOException;
071 import java.io.PrintStream;
072 import java.net.InetSocketAddress;
073 import java.net.URI;
074 import java.security.PrivilegedExceptionAction;
075 import java.util.ArrayList;
076 import java.util.Arrays;
077 import java.util.Collection;
078 import java.util.List;
079
080 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
081 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
082 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
083 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
084 import static org.apache.hadoop.util.ExitUtil.terminate;
085 import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
086
087 /**********************************************************
088 * NameNode serves as both directory namespace manager and
089 * "inode table" for the Hadoop DFS. There is a single NameNode
090 * running in any DFS deployment. (Well, except when there
091 * is a second backup/failover NameNode, or when using federated NameNodes.)
092 *
093 * The NameNode controls two critical tables:
094 * 1) filename->blocksequence (namespace)
095 * 2) block->machinelist ("inodes")
096 *
097 * The first table is stored on disk and is very precious.
098 * The second table is rebuilt every time the NameNode comes up.
099 *
100 * 'NameNode' refers to both this class as well as the 'NameNode server'.
101 * The 'FSNamesystem' class actually performs most of the filesystem
102 * management. The majority of the 'NameNode' class itself is concerned
103 * with exposing the IPC interface and the HTTP server to the outside world,
104 * plus some configuration management.
105 *
106 * NameNode implements the
107 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
108 * allows clients to ask for DFS services.
109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
110 * direct use by authors of DFS client code. End-users should instead use the
111 * {@link org.apache.hadoop.fs.FileSystem} class.
112 *
113 * NameNode also implements the
114 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
115 * used by DataNodes that actually store DFS data blocks. These
116 * methods are invoked repeatedly and automatically by all the
117 * DataNodes in a DFS deployment.
118 *
119 * NameNode also implements the
120 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
121 * used by secondary namenodes or rebalancing processes to get partial
122 * NameNode state, for example partial blocksMap etc.
123 **********************************************************/
124 @InterfaceAudience.Private
125 public class NameNode implements NameNodeStatusMXBean {
126 static{
127 HdfsConfiguration.init();
128 }
129
130 /**
131 * Categories of operations supported by the namenode.
132 */
133 public static enum OperationCategory {
134 /** Operations that are state agnostic */
135 UNCHECKED,
136 /** Read operation that does not change the namespace state */
137 READ,
138 /** Write operation that changes the namespace state */
139 WRITE,
140 /** Operations related to checkpointing */
141 CHECKPOINT,
142 /** Operations related to {@link JournalProtocol} */
143 JOURNAL
144 }
145
146 /**
147 * HDFS configuration can have three types of parameters:
148 * <ol>
149 * <li>Parameters that are common for all the name services in the cluster.</li>
150 * <li>Parameters that are specific to a name service. These keys are suffixed
151 * with nameserviceId in the configuration. For example,
152 * "dfs.namenode.rpc-address.nameservice1".</li>
153 * <li>Parameters that are specific to a single name node. These keys are suffixed
154 * with nameserviceId and namenodeId in the configuration. for example,
155 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
156 * </ol>
157 *
158 * In the latter cases, operators may specify the configuration without
159 * any suffix, with a nameservice suffix, or with a nameservice and namenode
160 * suffix. The more specific suffix will take precedence.
161 *
162 * These keys are specific to a given namenode, and thus may be configured
163 * globally, for a nameservice, or for a specific namenode within a nameservice.
164 */
165 public static final String[] NAMENODE_SPECIFIC_KEYS = {
166 DFS_NAMENODE_RPC_ADDRESS_KEY,
167 DFS_NAMENODE_RPC_BIND_HOST_KEY,
168 DFS_NAMENODE_NAME_DIR_KEY,
169 DFS_NAMENODE_EDITS_DIR_KEY,
170 DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
171 DFS_NAMENODE_CHECKPOINT_DIR_KEY,
172 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
173 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
174 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
175 DFS_NAMENODE_HTTP_ADDRESS_KEY,
176 DFS_NAMENODE_HTTPS_ADDRESS_KEY,
177 DFS_NAMENODE_KEYTAB_FILE_KEY,
178 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
179 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
180 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181 DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184 DFS_NAMENODE_USER_NAME_KEY,
185 DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186 DFS_HA_FENCE_METHODS_KEY,
187 DFS_HA_ZKFC_PORT_KEY,
188 DFS_HA_FENCE_METHODS_KEY
189 };
190
191 /**
192 * @see #NAMENODE_SPECIFIC_KEYS
193 * These keys are specific to a nameservice, but may not be overridden
194 * for a specific namenode.
195 */
196 public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197 DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198 };
199
200 private static final String USAGE = "Usage: java NameNode ["
201 + StartupOption.BACKUP.getName() + "] | ["
202 + StartupOption.CHECKPOINT.getName() + "] | ["
203 + StartupOption.FORMAT.getName() + " ["
204 + StartupOption.CLUSTERID.getName() + " cid ] ["
205 + StartupOption.FORCE.getName() + "] ["
206 + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207 + StartupOption.UPGRADE.getName() +
208 " [" + StartupOption.CLUSTERID.getName() + " cid]" +
209 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | ["
210 + StartupOption.ROLLBACK.getName() + "] | ["
211 + StartupOption.ROLLINGUPGRADE.getName() + " <"
212 + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|"
213 + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | ["
214 + StartupOption.FINALIZE.getName() + "] | ["
215 + StartupOption.IMPORT.getName() + "] | ["
216 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
217 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
218 + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
219 + " ] ]";
220
221 public long getProtocolVersion(String protocol,
222 long clientVersion) throws IOException {
223 if (protocol.equals(ClientProtocol.class.getName())) {
224 return ClientProtocol.versionID;
225 } else if (protocol.equals(DatanodeProtocol.class.getName())){
226 return DatanodeProtocol.versionID;
227 } else if (protocol.equals(NamenodeProtocol.class.getName())){
228 return NamenodeProtocol.versionID;
229 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
230 return RefreshAuthorizationPolicyProtocol.versionID;
231 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
232 return RefreshUserMappingsProtocol.versionID;
233 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
234 return RefreshCallQueueProtocol.versionID;
235 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
236 return GetUserMappingsProtocol.versionID;
237 } else {
238 throw new IOException("Unknown protocol to name node: " + protocol);
239 }
240 }
241
242 public static final int DEFAULT_PORT = 8020;
243 public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
244 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
245 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
246 public static final HAState ACTIVE_STATE = new ActiveState();
247 public static final HAState STANDBY_STATE = new StandbyState();
248
249 protected FSNamesystem namesystem;
250 protected final Configuration conf;
251 protected final NamenodeRole role;
252 private volatile HAState state;
253 private final boolean haEnabled;
254 private final HAContext haContext;
255 protected final boolean allowStaleStandbyReads;
256
257
258 /** httpServer */
259 protected NameNodeHttpServer httpServer;
260 private Thread emptier;
261 /** only used for testing purposes */
262 protected boolean stopRequested = false;
263 /** Registration information of this name-node */
264 protected NamenodeRegistration nodeRegistration;
265 /** Activated plug-ins. */
266 private List<ServicePlugin> plugins;
267
268 private NameNodeRpcServer rpcServer;
269
270 private JvmPauseMonitor pauseMonitor;
271 private ObjectName nameNodeStatusBeanName;
272 /**
273 * The service name of the delegation token issued by the namenode. It is
274 * the name service id in HA mode, or the rpc address in non-HA mode.
275 */
276 private String tokenServiceName;
277
278 /** Format a new filesystem. Destroys any filesystem that may already
279 * exist at this location. **/
280 public static void format(Configuration conf) throws IOException {
281 format(conf, true, true);
282 }
283
284 static NameNodeMetrics metrics;
285 private static final StartupProgress startupProgress = new StartupProgress();
286 /** Return the {@link FSNamesystem} object.
287 * @return {@link FSNamesystem} object.
288 */
289 public FSNamesystem getNamesystem() {
290 return namesystem;
291 }
292
293 public NamenodeProtocols getRpcServer() {
294 return rpcServer;
295 }
296
297 static void initMetrics(Configuration conf, NamenodeRole role) {
298 metrics = NameNodeMetrics.create(conf, role);
299 }
300
301 public static NameNodeMetrics getNameNodeMetrics() {
302 return metrics;
303 }
304
305 /**
306 * Returns object used for reporting namenode startup progress.
307 *
308 * @return StartupProgress for reporting namenode startup progress
309 */
310 public static StartupProgress getStartupProgress() {
311 return startupProgress;
312 }
313
314 /**
315 * Return the service name of the issued delegation token.
316 *
317 * @return The name service id in HA-mode, or the rpc address in non-HA mode
318 */
319 public String getTokenServiceName() { return tokenServiceName; }
320
321 public static InetSocketAddress getAddress(String address) {
322 return NetUtils.createSocketAddr(address, DEFAULT_PORT);
323 }
324
325 /**
326 * Set the configuration property for the service rpc address
327 * to address
328 */
329 public static void setServiceAddress(Configuration conf,
330 String address) {
331 LOG.info("Setting ADDRESS " + address);
332 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
333 }
334
335 /**
336 * Fetches the address for services to use when connecting to namenode
337 * based on the value of fallback returns null if the special
338 * address is not specified or returns the default namenode address
339 * to be used by both clients and services.
340 * Services here are datanodes, backup node, any non client connection
341 */
342 public static InetSocketAddress getServiceAddress(Configuration conf,
343 boolean fallback) {
344 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
345 if (addr == null || addr.isEmpty()) {
346 return fallback ? getAddress(conf) : null;
347 }
348 return getAddress(addr);
349 }
350
351 public static InetSocketAddress getAddress(Configuration conf) {
352 URI filesystemURI = FileSystem.getDefaultUri(conf);
353 return getAddress(filesystemURI);
354 }
355
356
357 /**
358 * TODO:FEDERATION
359 * @param filesystemURI
360 * @return address of file system
361 */
362 public static InetSocketAddress getAddress(URI filesystemURI) {
363 String authority = filesystemURI.getAuthority();
364 if (authority == null) {
365 throw new IllegalArgumentException(String.format(
366 "Invalid URI for NameNode address (check %s): %s has no authority.",
367 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
368 }
369 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
370 filesystemURI.getScheme())) {
371 throw new IllegalArgumentException(String.format(
372 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
373 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
374 HdfsConstants.HDFS_URI_SCHEME));
375 }
376 return getAddress(authority);
377 }
378
379 public static URI getUri(InetSocketAddress namenode) {
380 int port = namenode.getPort();
381 String portString = port == DEFAULT_PORT ? "" : (":"+port);
382 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
383 + namenode.getHostName()+portString);
384 }
385
386 //
387 // Common NameNode methods implementation for the active name-node role.
388 //
389 public NamenodeRole getRole() {
390 return role;
391 }
392
393 boolean isRole(NamenodeRole that) {
394 return role.equals(that);
395 }
396
397 /**
398 * Given a configuration get the address of the service rpc server
399 * If the service rpc is not configured returns null
400 */
401 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
402 return NameNode.getServiceAddress(conf, false);
403 }
404
405 protected InetSocketAddress getRpcServerAddress(Configuration conf) {
406 return getAddress(conf);
407 }
408
409 /** Given a configuration get the bind host of the service rpc server
410 * If the bind host is not configured returns null.
411 */
412 protected String getServiceRpcServerBindHost(Configuration conf) {
413 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
414 if (addr == null || addr.isEmpty()) {
415 return null;
416 }
417 return addr;
418 }
419
420 /** Given a configuration get the bind host of the client rpc server
421 * If the bind host is not configured returns null.
422 */
423 protected String getRpcServerBindHost(Configuration conf) {
424 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
425 if (addr == null || addr.isEmpty()) {
426 return null;
427 }
428 return addr;
429 }
430
431 /**
432 * Modifies the configuration passed to contain the service rpc address setting
433 */
434 protected void setRpcServiceServerAddress(Configuration conf,
435 InetSocketAddress serviceRPCAddress) {
436 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
437 }
438
439 protected void setRpcServerAddress(Configuration conf,
440 InetSocketAddress rpcAddress) {
441 FileSystem.setDefaultUri(conf, getUri(rpcAddress));
442 }
443
444 protected InetSocketAddress getHttpServerAddress(Configuration conf) {
445 return getHttpAddress(conf);
446 }
447
448 /** @return the NameNode HTTP address. */
449 public static InetSocketAddress getHttpAddress(Configuration conf) {
450 return NetUtils.createSocketAddr(
451 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
452 }
453
454 protected void loadNamesystem(Configuration conf) throws IOException {
455 this.namesystem = FSNamesystem.loadFromDisk(conf);
456 }
457
458 NamenodeRegistration getRegistration() {
459 return nodeRegistration;
460 }
461
462 NamenodeRegistration setRegistration() {
463 nodeRegistration = new NamenodeRegistration(
464 NetUtils.getHostPortString(rpcServer.getRpcAddress()),
465 NetUtils.getHostPortString(getHttpAddress()),
466 getFSImage().getStorage(), getRole());
467 return nodeRegistration;
468 }
469
470 /* optimize ugi lookup for RPC operations to avoid a trip through
471 * UGI.getCurrentUser which is synch'ed
472 */
473 public static UserGroupInformation getRemoteUser() throws IOException {
474 UserGroupInformation ugi = Server.getRemoteUser();
475 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
476 }
477
478
479 /**
480 * Login as the configured user for the NameNode.
481 */
482 void loginAsNameNodeUser(Configuration conf) throws IOException {
483 InetSocketAddress socAddr = getRpcServerAddress(conf);
484 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
485 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
486 }
487
488 /**
489 * Initialize name-node.
490 *
491 * @param conf the configuration
492 */
493 protected void initialize(Configuration conf) throws IOException {
494 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
495 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
496 if (intervals != null) {
497 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
498 intervals);
499 }
500 }
501
502 UserGroupInformation.setConfiguration(conf);
503 loginAsNameNodeUser(conf);
504
505 NameNode.initMetrics(conf, this.getRole());
506 StartupProgressMetrics.register(startupProgress);
507
508 if (NamenodeRole.NAMENODE == role) {
509 startHttpServer(conf);
510 }
511 loadNamesystem(conf);
512
513 rpcServer = createRpcServer(conf);
514 final String nsId = getNameServiceId(conf);
515 tokenServiceName = HAUtil.isHAEnabled(conf, nsId) ? nsId : NetUtils
516 .getHostPortString(rpcServer.getRpcAddress());
517 if (NamenodeRole.NAMENODE == role) {
518 httpServer.setNameNodeAddress(getNameNodeAddress());
519 httpServer.setFSImage(getFSImage());
520 }
521
522 pauseMonitor = new JvmPauseMonitor(conf);
523 pauseMonitor.start();
524
525 startCommonServices(conf);
526 }
527
528 /**
529 * Create the RPC server implementation. Used as an extension point for the
530 * BackupNode.
531 */
532 protected NameNodeRpcServer createRpcServer(Configuration conf)
533 throws IOException {
534 return new NameNodeRpcServer(conf, this);
535 }
536
537 /** Start the services common to active and standby states */
538 private void startCommonServices(Configuration conf) throws IOException {
539 namesystem.startCommonServices(conf, haContext);
540 registerNNSMXBean();
541 if (NamenodeRole.NAMENODE != role) {
542 startHttpServer(conf);
543 httpServer.setNameNodeAddress(getNameNodeAddress());
544 httpServer.setFSImage(getFSImage());
545 }
546 rpcServer.start();
547 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
548 ServicePlugin.class);
549 for (ServicePlugin p: plugins) {
550 try {
551 p.start(this);
552 } catch (Throwable t) {
553 LOG.warn("ServicePlugin " + p + " could not be started", t);
554 }
555 }
556 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
557 if (rpcServer.getServiceRpcAddress() != null) {
558 LOG.info(getRole() + " service RPC up at: "
559 + rpcServer.getServiceRpcAddress());
560 }
561 }
562
563 private void stopCommonServices() {
564 if(rpcServer != null) rpcServer.stop();
565 if(namesystem != null) namesystem.close();
566 if (pauseMonitor != null) pauseMonitor.stop();
567 if (plugins != null) {
568 for (ServicePlugin p : plugins) {
569 try {
570 p.stop();
571 } catch (Throwable t) {
572 LOG.warn("ServicePlugin " + p + " could not be stopped", t);
573 }
574 }
575 }
576 stopHttpServer();
577 }
578
579 private void startTrashEmptier(final Configuration conf) throws IOException {
580 long trashInterval =
581 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
582 if (trashInterval == 0) {
583 return;
584 } else if (trashInterval < 0) {
585 throw new IOException("Cannot start trash emptier with negative interval."
586 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
587 }
588
589 // This may be called from the transitionToActive code path, in which
590 // case the current user is the administrator, not the NN. The trash
591 // emptier needs to run as the NN. See HDFS-3972.
592 FileSystem fs = SecurityUtil.doAsLoginUser(
593 new PrivilegedExceptionAction<FileSystem>() {
594 @Override
595 public FileSystem run() throws IOException {
596 return FileSystem.get(conf);
597 }
598 });
599 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
600 this.emptier.setDaemon(true);
601 this.emptier.start();
602 }
603
604 private void stopTrashEmptier() {
605 if (this.emptier != null) {
606 emptier.interrupt();
607 emptier = null;
608 }
609 }
610
611 private void startHttpServer(final Configuration conf) throws IOException {
612 httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
613 httpServer.start();
614 httpServer.setStartupProgress(startupProgress);
615 }
616
617 private void stopHttpServer() {
618 try {
619 if (httpServer != null) httpServer.stop();
620 } catch (Exception e) {
621 LOG.error("Exception while stopping httpserver", e);
622 }
623 }
624
625 /**
626 * Start NameNode.
627 * <p>
628 * The name-node can be started with one of the following startup options:
629 * <ul>
630 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
631 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
632 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
633 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
634 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster
635 * upgrade and create a snapshot of the current file system state</li>
636 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
637 * metadata</li>
638 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the
639 * cluster back to the previous state</li>
640 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize
641 * previous upgrade</li>
642 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
643 * </ul>
644 * The option is passed via configuration field:
645 * <tt>dfs.namenode.startup</tt>
646 *
647 * The conf will be modified to reflect the actual ports on which
648 * the NameNode is up and running if the user passes the port as
649 * <code>zero</code> in the conf.
650 *
651 * @param conf confirguration
652 * @throws IOException
653 */
654 public NameNode(Configuration conf) throws IOException {
655 this(conf, NamenodeRole.NAMENODE);
656 }
657
658 protected NameNode(Configuration conf, NamenodeRole role)
659 throws IOException {
660 this.conf = conf;
661 this.role = role;
662 String nsId = getNameServiceId(conf);
663 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
664 this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
665 state = createHAState(getStartupOption(conf));
666 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
667 this.haContext = createHAContext();
668 try {
669 initializeGenericKeys(conf, nsId, namenodeId);
670 initialize(conf);
671 try {
672 haContext.writeLock();
673 state.prepareToEnterState(haContext);
674 state.enterState(haContext);
675 } finally {
676 haContext.writeUnlock();
677 }
678 } catch (IOException e) {
679 this.stop();
680 throw e;
681 } catch (HadoopIllegalArgumentException e) {
682 this.stop();
683 throw e;
684 }
685 }
686
687 protected HAState createHAState(StartupOption startOpt) {
688 if (!haEnabled || startOpt == StartupOption.UPGRADE) {
689 return ACTIVE_STATE;
690 } else {
691 return STANDBY_STATE;
692 }
693 }
694
695 protected HAContext createHAContext() {
696 return new NameNodeHAContext();
697 }
698
699 /**
700 * Wait for service to finish.
701 * (Normally, it runs forever.)
702 */
703 public void join() {
704 try {
705 rpcServer.join();
706 } catch (InterruptedException ie) {
707 LOG.info("Caught interrupted exception ", ie);
708 }
709 }
710
711 /**
712 * Stop all NameNode threads and wait for all to finish.
713 */
714 public void stop() {
715 synchronized(this) {
716 if (stopRequested)
717 return;
718 stopRequested = true;
719 }
720 try {
721 if (state != null) {
722 state.exitState(haContext);
723 }
724 } catch (ServiceFailedException e) {
725 LOG.warn("Encountered exception while exiting state ", e);
726 } finally {
727 stopCommonServices();
728 if (metrics != null) {
729 metrics.shutdown();
730 }
731 if (namesystem != null) {
732 namesystem.shutdown();
733 }
734 if (nameNodeStatusBeanName != null) {
735 MBeans.unregister(nameNodeStatusBeanName);
736 nameNodeStatusBeanName = null;
737 }
738 }
739 }
740
741 synchronized boolean isStopRequested() {
742 return stopRequested;
743 }
744
745 /**
746 * Is the cluster currently in safe mode?
747 */
748 public boolean isInSafeMode() {
749 return namesystem.isInSafeMode();
750 }
751
752 /** get FSImage */
753 @VisibleForTesting
754 public FSImage getFSImage() {
755 return namesystem.dir.fsImage;
756 }
757
758 /**
759 * @return NameNode RPC address
760 */
761 public InetSocketAddress getNameNodeAddress() {
762 return rpcServer.getRpcAddress();
763 }
764
765 /**
766 * @return NameNode RPC address in "host:port" string form
767 */
768 public String getNameNodeAddressHostPortString() {
769 return NetUtils.getHostPortString(rpcServer.getRpcAddress());
770 }
771
772 /**
773 * @return NameNode service RPC address if configured, the
774 * NameNode RPC address otherwise
775 */
776 public InetSocketAddress getServiceRpcAddress() {
777 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
778 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
779 }
780
781 /**
782 * @return NameNode HTTP address, used by the Web UI, image transfer,
783 * and HTTP-based file system clients like Hftp and WebHDFS
784 */
785 public InetSocketAddress getHttpAddress() {
786 return httpServer.getHttpAddress();
787 }
788
789 /**
790 * @return NameNode HTTPS address, used by the Web UI, image transfer,
791 * and HTTP-based file system clients like Hftp and WebHDFS
792 */
793 public InetSocketAddress getHttpsAddress() {
794 return httpServer.getHttpsAddress();
795 }
796
797 /**
798 * Verify that configured directories exist, then
799 * Interactively confirm that formatting is desired
800 * for each existing directory and format them.
801 *
802 * @param conf
803 * @param force
804 * @return true if formatting was aborted, false otherwise
805 * @throws IOException
806 */
807 private static boolean format(Configuration conf, boolean force,
808 boolean isInteractive) throws IOException {
809 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
810 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
811 initializeGenericKeys(conf, nsId, namenodeId);
812 checkAllowFormat(conf);
813
814 if (UserGroupInformation.isSecurityEnabled()) {
815 InetSocketAddress socAddr = getAddress(conf);
816 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
817 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
818 }
819
820 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
821 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
822 List<URI> dirsToPrompt = new ArrayList<URI>();
823 dirsToPrompt.addAll(nameDirsToFormat);
824 dirsToPrompt.addAll(sharedDirs);
825 List<URI> editDirsToFormat =
826 FSNamesystem.getNamespaceEditsDirs(conf);
827
828 // if clusterID is not provided - see if you can find the current one
829 String clusterId = StartupOption.FORMAT.getClusterId();
830 if(clusterId == null || clusterId.equals("")) {
831 //Generate a new cluster id
832 clusterId = NNStorage.newClusterID();
833 }
834 System.out.println("Formatting using clusterid: " + clusterId);
835
836 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
837 try {
838 FSNamesystem fsn = new FSNamesystem(conf, fsImage);
839 fsImage.getEditLog().initJournalsForWrite();
840
841 if (!fsImage.confirmFormat(force, isInteractive)) {
842 return true; // aborted
843 }
844
845 fsImage.format(fsn, clusterId);
846 } catch (IOException ioe) {
847 LOG.warn("Encountered exception during format: ", ioe);
848 fsImage.close();
849 throw ioe;
850 }
851 return false;
852 }
853
854 public static void checkAllowFormat(Configuration conf) throws IOException {
855 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY,
856 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
857 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
858 + " is set to false for this filesystem, so it "
859 + "cannot be formatted. You will need to set "
860 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
861 + "to true in order to format this filesystem");
862 }
863 }
864
865 @VisibleForTesting
866 public static boolean initializeSharedEdits(Configuration conf) throws IOException {
867 return initializeSharedEdits(conf, true);
868 }
869
870 @VisibleForTesting
871 public static boolean initializeSharedEdits(Configuration conf,
872 boolean force) throws IOException {
873 return initializeSharedEdits(conf, force, false);
874 }
875
876 /**
877 * Clone the supplied configuration but remove the shared edits dirs.
878 *
879 * @param conf Supplies the original configuration.
880 * @return Cloned configuration without the shared edit dirs.
881 * @throws IOException on failure to generate the configuration.
882 */
883 private static Configuration getConfigurationWithoutSharedEdits(
884 Configuration conf)
885 throws IOException {
886 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
887 String editsDirsString = Joiner.on(",").join(editsDirs);
888
889 Configuration confWithoutShared = new Configuration(conf);
890 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
891 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
892 editsDirsString);
893 return confWithoutShared;
894 }
895
896 /**
897 * Format a new shared edits dir and copy in enough edit log segments so that
898 * the standby NN can start up.
899 *
900 * @param conf configuration
901 * @param force format regardless of whether or not the shared edits dir exists
902 * @param interactive prompt the user when a dir exists
903 * @return true if the command aborts, false otherwise
904 */
905 private static boolean initializeSharedEdits(Configuration conf,
906 boolean force, boolean interactive) throws IOException {
907 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
908 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
909 initializeGenericKeys(conf, nsId, namenodeId);
910
911 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
912 LOG.fatal("No shared edits directory configured for namespace " +
913 nsId + " namenode " + namenodeId);
914 return false;
915 }
916
917 if (UserGroupInformation.isSecurityEnabled()) {
918 InetSocketAddress socAddr = getAddress(conf);
919 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
920 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
921 }
922
923 NNStorage existingStorage = null;
924 FSImage sharedEditsImage = null;
925 try {
926 FSNamesystem fsns =
927 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
928
929 existingStorage = fsns.getFSImage().getStorage();
930 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
931
932 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
933
934 sharedEditsImage = new FSImage(conf,
935 Lists.<URI>newArrayList(),
936 sharedEditsDirs);
937 sharedEditsImage.getEditLog().initJournalsForWrite();
938
939 if (!sharedEditsImage.confirmFormat(force, interactive)) {
940 return true; // abort
941 }
942
943 NNStorage newSharedStorage = sharedEditsImage.getStorage();
944 // Call Storage.format instead of FSImage.format here, since we don't
945 // actually want to save a checkpoint - just prime the dirs with
946 // the existing namespace info
947 newSharedStorage.format(nsInfo);
948 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
949
950 // Need to make sure the edit log segments are in good shape to initialize
951 // the shared edits dir.
952 fsns.getFSImage().getEditLog().close();
953 fsns.getFSImage().getEditLog().initJournalsForWrite();
954 fsns.getFSImage().getEditLog().recoverUnclosedStreams();
955
956 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
957 conf);
958 } catch (IOException ioe) {
959 LOG.error("Could not initialize shared edits dir", ioe);
960 return true; // aborted
961 } finally {
962 if (sharedEditsImage != null) {
963 try {
964 sharedEditsImage.close();
965 } catch (IOException ioe) {
966 LOG.warn("Could not close sharedEditsImage", ioe);
967 }
968 }
969 // Have to unlock storage explicitly for the case when we're running in a
970 // unit test, which runs in the same JVM as NNs.
971 if (existingStorage != null) {
972 try {
973 existingStorage.unlockAll();
974 } catch (IOException ioe) {
975 LOG.warn("Could not unlock storage directories", ioe);
976 return true; // aborted
977 }
978 }
979 }
980 return false; // did not abort
981 }
982
983 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
984 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
985 Configuration conf) throws IOException {
986 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
987 "No shared edits specified");
988 // Copy edit log segments into the new shared edits dir.
989 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
990 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
991 sharedEditsUris);
992 newSharedEditLog.initJournalsForWrite();
993 newSharedEditLog.recoverUnclosedStreams();
994
995 FSEditLog sourceEditLog = fsns.getFSImage().editLog;
996
997 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
998
999 Collection<EditLogInputStream> streams = null;
1000 try {
1001 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1002
1003 // Set the nextTxid to the CheckpointTxId+1
1004 newSharedEditLog.setNextTxId(fromTxId + 1);
1005
1006 // Copy all edits after last CheckpointTxId to shared edits dir
1007 for (EditLogInputStream stream : streams) {
1008 LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1009 FSEditLogOp op;
1010 boolean segmentOpen = false;
1011 while ((op = stream.readOp()) != null) {
1012 if (LOG.isTraceEnabled()) {
1013 LOG.trace("copying op: " + op);
1014 }
1015 if (!segmentOpen) {
1016 newSharedEditLog.startLogSegment(op.txid, false);
1017 segmentOpen = true;
1018 }
1019
1020 newSharedEditLog.logEdit(op);
1021
1022 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1023 newSharedEditLog.logSync();
1024 newSharedEditLog.endCurrentLogSegment(false);
1025 LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1026 + stream);
1027 segmentOpen = false;
1028 }
1029 }
1030
1031 if (segmentOpen) {
1032 LOG.debug("ending log segment because of end of stream in " + stream);
1033 newSharedEditLog.logSync();
1034 newSharedEditLog.endCurrentLogSegment(false);
1035 segmentOpen = false;
1036 }
1037 }
1038 } finally {
1039 if (streams != null) {
1040 FSEditLog.closeAllStreams(streams);
1041 }
1042 }
1043 }
1044
1045 @VisibleForTesting
1046 public static boolean doRollback(Configuration conf,
1047 boolean isConfirmationNeeded) throws IOException {
1048 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1049 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1050 initializeGenericKeys(conf, nsId, namenodeId);
1051
1052 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1053 System.err.print(
1054 "\"rollBack\" will remove the current state of the file system,\n"
1055 + "returning you to the state prior to initiating your recent.\n"
1056 + "upgrade. This action is permanent and cannot be undone. If you\n"
1057 + "are performing a rollback in an HA environment, you should be\n"
1058 + "certain that no NameNode process is running on any host.");
1059 if (isConfirmationNeeded) {
1060 if (!confirmPrompt("Roll back file system state?")) {
1061 System.err.println("Rollback aborted.");
1062 return true;
1063 }
1064 }
1065 nsys.dir.fsImage.doRollback(nsys);
1066 return false;
1067 }
1068
1069 private static void printUsage(PrintStream out) {
1070 out.println(USAGE + "\n");
1071 }
1072
1073 @VisibleForTesting
1074 static StartupOption parseArguments(String args[]) {
1075 int argsLen = (args == null) ? 0 : args.length;
1076 StartupOption startOpt = StartupOption.REGULAR;
1077 for(int i=0; i < argsLen; i++) {
1078 String cmd = args[i];
1079 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1080 startOpt = StartupOption.FORMAT;
1081 for (i = i + 1; i < argsLen; i++) {
1082 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1083 i++;
1084 if (i >= argsLen) {
1085 // if no cluster id specified, return null
1086 LOG.fatal("Must specify a valid cluster ID after the "
1087 + StartupOption.CLUSTERID.getName() + " flag");
1088 return null;
1089 }
1090 String clusterId = args[i];
1091 // Make sure an id is specified and not another flag
1092 if (clusterId.isEmpty() ||
1093 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1094 clusterId.equalsIgnoreCase(
1095 StartupOption.NONINTERACTIVE.getName())) {
1096 LOG.fatal("Must specify a valid cluster ID after the "
1097 + StartupOption.CLUSTERID.getName() + " flag");
1098 return null;
1099 }
1100 startOpt.setClusterId(clusterId);
1101 }
1102
1103 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1104 startOpt.setForceFormat(true);
1105 }
1106
1107 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1108 startOpt.setInteractiveFormat(false);
1109 }
1110 }
1111 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1112 startOpt = StartupOption.GENCLUSTERID;
1113 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1114 startOpt = StartupOption.REGULAR;
1115 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1116 startOpt = StartupOption.BACKUP;
1117 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1118 startOpt = StartupOption.CHECKPOINT;
1119 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1120 startOpt = StartupOption.UPGRADE;
1121 /* Can be followed by CLUSTERID with a required parameter or
1122 * RENAMERESERVED with an optional parameter
1123 */
1124 while (i + 1 < argsLen) {
1125 String flag = args[i + 1];
1126 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1127 if (i + 2 < argsLen) {
1128 i += 2;
1129 startOpt.setClusterId(args[i]);
1130 } else {
1131 LOG.fatal("Must specify a valid cluster ID after the "
1132 + StartupOption.CLUSTERID.getName() + " flag");
1133 return null;
1134 }
1135 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1136 .getName())) {
1137 if (i + 2 < argsLen) {
1138 FSImageFormat.setRenameReservedPairs(args[i + 2]);
1139 i += 2;
1140 } else {
1141 FSImageFormat.useDefaultRenameReservedPairs();
1142 i += 1;
1143 }
1144 } else {
1145 LOG.fatal("Unknown upgrade flag " + flag);
1146 return null;
1147 }
1148 }
1149 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1150 startOpt = StartupOption.ROLLINGUPGRADE;
1151 ++i;
1152 startOpt.setRollingUpgradeStartupOption(args[i]);
1153 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1154 startOpt = StartupOption.ROLLBACK;
1155 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1156 startOpt = StartupOption.FINALIZE;
1157 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1158 startOpt = StartupOption.IMPORT;
1159 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1160 startOpt = StartupOption.BOOTSTRAPSTANDBY;
1161 return startOpt;
1162 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1163 startOpt = StartupOption.INITIALIZESHAREDEDITS;
1164 for (i = i + 1 ; i < argsLen; i++) {
1165 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1166 startOpt.setInteractiveFormat(false);
1167 } else if (StartupOption.FORCE.getName().equals(args[i])) {
1168 startOpt.setForceFormat(true);
1169 } else {
1170 LOG.fatal("Invalid argument: " + args[i]);
1171 return null;
1172 }
1173 }
1174 return startOpt;
1175 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1176 if (startOpt != StartupOption.REGULAR) {
1177 throw new RuntimeException("Can't combine -recover with " +
1178 "other startup options.");
1179 }
1180 startOpt = StartupOption.RECOVER;
1181 while (++i < argsLen) {
1182 if (args[i].equalsIgnoreCase(
1183 StartupOption.FORCE.getName())) {
1184 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1185 } else {
1186 throw new RuntimeException("Error parsing recovery options: " +
1187 "can't understand option \"" + args[i] + "\"");
1188 }
1189 }
1190 } else {
1191 return null;
1192 }
1193 }
1194 return startOpt;
1195 }
1196
1197 private static void setStartupOption(Configuration conf, StartupOption opt) {
1198 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1199 }
1200
1201 static StartupOption getStartupOption(Configuration conf) {
1202 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1203 StartupOption.REGULAR.toString()));
1204 }
1205
1206 private static void doRecovery(StartupOption startOpt, Configuration conf)
1207 throws IOException {
1208 String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1209 String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1210 initializeGenericKeys(conf, nsId, namenodeId);
1211 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1212 if (!confirmPrompt("You have selected Metadata Recovery mode. " +
1213 "This mode is intended to recover lost metadata on a corrupt " +
1214 "filesystem. Metadata recovery mode often permanently deletes " +
1215 "data from your HDFS filesystem. Please back up your edit log " +
1216 "and fsimage before trying this!\n\n" +
1217 "Are you ready to proceed? (Y/N)\n")) {
1218 System.err.println("Recovery aborted at user request.\n");
1219 return;
1220 }
1221 }
1222 MetaRecoveryContext.LOG.info("starting recovery...");
1223 UserGroupInformation.setConfiguration(conf);
1224 NameNode.initMetrics(conf, startOpt.toNodeRole());
1225 FSNamesystem fsn = null;
1226 try {
1227 fsn = FSNamesystem.loadFromDisk(conf);
1228 fsn.getFSImage().saveNamespace(fsn);
1229 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1230 } catch (IOException e) {
1231 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1232 throw e;
1233 } catch (RuntimeException e) {
1234 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1235 throw e;
1236 } finally {
1237 if (fsn != null)
1238 fsn.close();
1239 }
1240 }
1241
1242 public static NameNode createNameNode(String argv[], Configuration conf)
1243 throws IOException {
1244 LOG.info("createNameNode " + Arrays.asList(argv));
1245 if (conf == null)
1246 conf = new HdfsConfiguration();
1247 StartupOption startOpt = parseArguments(argv);
1248 if (startOpt == null) {
1249 printUsage(System.err);
1250 return null;
1251 }
1252 setStartupOption(conf, startOpt);
1253
1254 switch (startOpt) {
1255 case FORMAT: {
1256 boolean aborted = format(conf, startOpt.getForceFormat(),
1257 startOpt.getInteractiveFormat());
1258 terminate(aborted ? 1 : 0);
1259 return null; // avoid javac warning
1260 }
1261 case GENCLUSTERID: {
1262 System.err.println("Generating new cluster id:");
1263 System.out.println(NNStorage.newClusterID());
1264 terminate(0);
1265 return null;
1266 }
1267 case FINALIZE: {
1268 System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1269 "' is no longer supported. To finalize an upgrade, start the NN " +
1270 " and then run `hdfs dfsadmin -finalizeUpgrade'");
1271 terminate(1);
1272 return null; // avoid javac warning
1273 }
1274 case ROLLBACK: {
1275 boolean aborted = doRollback(conf, true);
1276 terminate(aborted ? 1 : 0);
1277 return null; // avoid warning
1278 }
1279 case BOOTSTRAPSTANDBY: {
1280 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1281 int rc = BootstrapStandby.run(toolArgs, conf);
1282 terminate(rc);
1283 return null; // avoid warning
1284 }
1285 case INITIALIZESHAREDEDITS: {
1286 boolean aborted = initializeSharedEdits(conf,
1287 startOpt.getForceFormat(),
1288 startOpt.getInteractiveFormat());
1289 terminate(aborted ? 1 : 0);
1290 return null; // avoid warning
1291 }
1292 case BACKUP:
1293 case CHECKPOINT: {
1294 NamenodeRole role = startOpt.toNodeRole();
1295 DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1296 return new BackupNode(conf, role);
1297 }
1298 case RECOVER: {
1299 NameNode.doRecovery(startOpt, conf);
1300 return null;
1301 }
1302 default: {
1303 DefaultMetricsSystem.initialize("NameNode");
1304 return new NameNode(conf);
1305 }
1306 }
1307 }
1308
1309 /**
1310 * In federation configuration is set for a set of
1311 * namenode and secondary namenode/backup/checkpointer, which are
1312 * grouped under a logical nameservice ID. The configuration keys specific
1313 * to them have suffix set to configured nameserviceId.
1314 *
1315 * This method copies the value from specific key of format key.nameserviceId
1316 * to key, to set up the generic configuration. Once this is done, only
1317 * generic version of the configuration is read in rest of the code, for
1318 * backward compatibility and simpler code changes.
1319 *
1320 * @param conf
1321 * Configuration object to lookup specific key and to set the value
1322 * to the key passed. Note the conf object is modified
1323 * @param nameserviceId name service Id (to distinguish federated NNs)
1324 * @param namenodeId the namenode ID (to distinguish HA NNs)
1325 * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1326 */
1327 public static void initializeGenericKeys(Configuration conf,
1328 String nameserviceId, String namenodeId) {
1329 if ((nameserviceId != null && !nameserviceId.isEmpty()) ||
1330 (namenodeId != null && !namenodeId.isEmpty())) {
1331 if (nameserviceId != null) {
1332 conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1333 }
1334 if (namenodeId != null) {
1335 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1336 }
1337
1338 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1339 NAMENODE_SPECIFIC_KEYS);
1340 DFSUtil.setGenericConf(conf, nameserviceId, null,
1341 NAMESERVICE_SPECIFIC_KEYS);
1342 }
1343
1344 // If the RPC address is set use it to (re-)configure the default FS
1345 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1346 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1347 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1348 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1349 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1350 }
1351 }
1352
1353 /**
1354 * Get the name service Id for the node
1355 * @return name service Id or null if federation is not configured
1356 */
1357 protected String getNameServiceId(Configuration conf) {
1358 return DFSUtil.getNamenodeNameServiceId(conf);
1359 }
1360
1361 /**
1362 */
1363 public static void main(String argv[]) throws Exception {
1364 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1365 System.exit(0);
1366 }
1367
1368 try {
1369 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1370 NameNode namenode = createNameNode(argv, null);
1371 if (namenode != null) {
1372 namenode.join();
1373 }
1374 } catch (Throwable e) {
1375 LOG.fatal("Exception in namenode join", e);
1376 terminate(1, e);
1377 }
1378 }
1379
1380 synchronized void monitorHealth()
1381 throws HealthCheckFailedException, AccessControlException {
1382 namesystem.checkSuperuserPrivilege();
1383 if (!haEnabled) {
1384 return; // no-op, if HA is not enabled
1385 }
1386 getNamesystem().checkAvailableResources();
1387 if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1388 throw new HealthCheckFailedException(
1389 "The NameNode has no resources available");
1390 }
1391 }
1392
1393 synchronized void transitionToActive()
1394 throws ServiceFailedException, AccessControlException {
1395 namesystem.checkSuperuserPrivilege();
1396 if (!haEnabled) {
1397 throw new ServiceFailedException("HA for namenode is not enabled");
1398 }
1399 state.setState(haContext, ACTIVE_STATE);
1400 }
1401
1402 synchronized void transitionToStandby()
1403 throws ServiceFailedException, AccessControlException {
1404 namesystem.checkSuperuserPrivilege();
1405 if (!haEnabled) {
1406 throw new ServiceFailedException("HA for namenode is not enabled");
1407 }
1408 state.setState(haContext, STANDBY_STATE);
1409 }
1410
1411 synchronized HAServiceStatus getServiceStatus()
1412 throws ServiceFailedException, AccessControlException {
1413 namesystem.checkSuperuserPrivilege();
1414 if (!haEnabled) {
1415 throw new ServiceFailedException("HA for namenode is not enabled");
1416 }
1417 if (state == null) {
1418 return new HAServiceStatus(HAServiceState.INITIALIZING);
1419 }
1420 HAServiceState retState = state.getServiceState();
1421 HAServiceStatus ret = new HAServiceStatus(retState);
1422 if (retState == HAServiceState.STANDBY) {
1423 String safemodeTip = namesystem.getSafeModeTip();
1424 if (!safemodeTip.isEmpty()) {
1425 ret.setNotReadyToBecomeActive(
1426 "The NameNode is in safemode. " +
1427 safemodeTip);
1428 } else {
1429 ret.setReadyToBecomeActive();
1430 }
1431 } else if (retState == HAServiceState.ACTIVE) {
1432 ret.setReadyToBecomeActive();
1433 } else {
1434 ret.setNotReadyToBecomeActive("State is " + state);
1435 }
1436 return ret;
1437 }
1438
1439 synchronized HAServiceState getServiceState() {
1440 if (state == null) {
1441 return HAServiceState.INITIALIZING;
1442 }
1443 return state.getServiceState();
1444 }
1445
1446 /**
1447 * Register NameNodeStatusMXBean
1448 */
1449 private void registerNNSMXBean() {
1450 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1451 }
1452
1453 @Override // NameNodeStatusMXBean
1454 public String getNNRole() {
1455 String roleStr = "";
1456 NamenodeRole role = getRole();
1457 if (null != role) {
1458 roleStr = role.toString();
1459 }
1460 return roleStr;
1461 }
1462
1463 @Override // NameNodeStatusMXBean
1464 public String getState() {
1465 String servStateStr = "";
1466 HAServiceState servState = getServiceState();
1467 if (null != servState) {
1468 servStateStr = servState.toString();
1469 }
1470 return servStateStr;
1471 }
1472
1473 @Override // NameNodeStatusMXBean
1474 public String getHostAndPort() {
1475 return getNameNodeAddressHostPortString();
1476 }
1477
1478 @Override // NameNodeStatusMXBean
1479 public boolean isSecurityEnabled() {
1480 return UserGroupInformation.isSecurityEnabled();
1481 }
1482
1483 /**
1484 * Shutdown the NN immediately in an ungraceful way. Used when it would be
1485 * unsafe for the NN to continue operating, e.g. during a failed HA state
1486 * transition.
1487 *
1488 * @param t exception which warrants the shutdown. Printed to the NN log
1489 * before exit.
1490 * @throws ExitException thrown only for testing.
1491 */
1492 protected synchronized void doImmediateShutdown(Throwable t)
1493 throws ExitException {
1494 String message = "Error encountered requiring NN shutdown. " +
1495 "Shutting down immediately.";
1496 try {
1497 LOG.fatal(message, t);
1498 } catch (Throwable ignored) {
1499 // This is unlikely to happen, but there's nothing we can do if it does.
1500 }
1501 terminate(1, t);
1502 }
1503
1504 /**
1505 * Class used to expose {@link NameNode} as context to {@link HAState}
1506 */
1507 protected class NameNodeHAContext implements HAContext {
1508 @Override
1509 public void setState(HAState s) {
1510 state = s;
1511 }
1512
1513 @Override
1514 public HAState getState() {
1515 return state;
1516 }
1517
1518 @Override
1519 public void startActiveServices() throws IOException {
1520 try {
1521 namesystem.startActiveServices();
1522 startTrashEmptier(conf);
1523 } catch (Throwable t) {
1524 doImmediateShutdown(t);
1525 }
1526 }
1527
1528 @Override
1529 public void stopActiveServices() throws IOException {
1530 try {
1531 if (namesystem != null) {
1532 namesystem.stopActiveServices();
1533 }
1534 stopTrashEmptier();
1535 } catch (Throwable t) {
1536 doImmediateShutdown(t);
1537 }
1538 }
1539
1540 @Override
1541 public void startStandbyServices() throws IOException {
1542 try {
1543 namesystem.startStandbyServices(conf);
1544 } catch (Throwable t) {
1545 doImmediateShutdown(t);
1546 }
1547 }
1548
1549 @Override
1550 public void prepareToStopStandbyServices() throws ServiceFailedException {
1551 try {
1552 namesystem.prepareToStopStandbyServices();
1553 } catch (Throwable t) {
1554 doImmediateShutdown(t);
1555 }
1556 }
1557
1558 @Override
1559 public void stopStandbyServices() throws IOException {
1560 try {
1561 if (namesystem != null) {
1562 namesystem.stopStandbyServices();
1563 }
1564 } catch (Throwable t) {
1565 doImmediateShutdown(t);
1566 }
1567 }
1568
1569 @Override
1570 public void writeLock() {
1571 namesystem.writeLock();
1572 }
1573
1574 @Override
1575 public void writeUnlock() {
1576 namesystem.writeUnlock();
1577 }
1578
1579 /** Check if an operation of given category is allowed */
1580 @Override
1581 public void checkOperation(final OperationCategory op)
1582 throws StandbyException {
1583 state.checkOperation(haContext, op);
1584 }
1585
1586 @Override
1587 public boolean allowStaleReads() {
1588 return allowStaleStandbyReads;
1589 }
1590
1591 }
1592
1593 public boolean isStandbyState() {
1594 return (state.equals(STANDBY_STATE));
1595 }
1596
1597 /**
1598 * Check that a request to change this node's HA state is valid.
1599 * In particular, verifies that, if auto failover is enabled, non-forced
1600 * requests from the HAAdmin CLI are rejected, and vice versa.
1601 *
1602 * @param req the request to check
1603 * @throws AccessControlException if the request is disallowed
1604 */
1605 void checkHaStateChange(StateChangeRequestInfo req)
1606 throws AccessControlException {
1607 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1608 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1609 switch (req.getSource()) {
1610 case REQUEST_BY_USER:
1611 if (autoHaEnabled) {
1612 throw new AccessControlException(
1613 "Manual HA control for this NameNode is disallowed, because " +
1614 "automatic HA is enabled.");
1615 }
1616 break;
1617 case REQUEST_BY_USER_FORCED:
1618 if (autoHaEnabled) {
1619 LOG.warn("Allowing manual HA control from " +
1620 Server.getRemoteAddress() +
1621 " even though automatic HA is enabled, because the user " +
1622 "specified the force flag");
1623 }
1624 break;
1625 case REQUEST_BY_ZKFC:
1626 if (!autoHaEnabled) {
1627 throw new AccessControlException(
1628 "Request from ZK failover controller at " +
1629 Server.getRemoteAddress() + " denied since automatic HA " +
1630 "is not enabled");
1631 }
1632 break;
1633 }
1634 }
1635 }