001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import com.google.common.annotations.VisibleForTesting;
021    import com.google.common.base.Joiner;
022    import com.google.common.base.Preconditions;
023    import com.google.common.collect.Lists;
024    
025    import org.apache.commons.logging.Log;
026    import org.apache.commons.logging.LogFactory;
027    import org.apache.hadoop.HadoopIllegalArgumentException;
028    import org.apache.hadoop.classification.InterfaceAudience;
029    import org.apache.hadoop.conf.Configuration;
030    import org.apache.hadoop.fs.FileSystem;
031    import org.apache.hadoop.fs.Trash;
032    import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033    import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034    import org.apache.hadoop.ha.HAServiceStatus;
035    import org.apache.hadoop.ha.HealthCheckFailedException;
036    import org.apache.hadoop.ha.ServiceFailedException;
037    import org.apache.hadoop.hdfs.DFSConfigKeys;
038    import org.apache.hadoop.hdfs.DFSUtil;
039    import org.apache.hadoop.hdfs.HAUtil;
040    import org.apache.hadoop.hdfs.HdfsConfiguration;
041    import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046    import org.apache.hadoop.hdfs.server.namenode.ha.*;
047    import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050    import org.apache.hadoop.hdfs.server.protocol.*;
051    import org.apache.hadoop.ipc.Server;
052    import org.apache.hadoop.ipc.StandbyException;
053    import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054    import org.apache.hadoop.metrics2.util.MBeans;
055    import org.apache.hadoop.net.NetUtils;
056    import org.apache.hadoop.security.AccessControlException;
057    import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058    import org.apache.hadoop.security.SecurityUtil;
059    import org.apache.hadoop.security.UserGroupInformation;
060    import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061    import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062    import org.apache.hadoop.tools.GetUserMappingsProtocol;
063    import org.apache.hadoop.util.ExitUtil.ExitException;
064    import org.apache.hadoop.util.JvmPauseMonitor;
065    import org.apache.hadoop.util.ServicePlugin;
066    import org.apache.hadoop.util.StringUtils;
067    
068    import javax.management.ObjectName;
069    
070    import java.io.IOException;
071    import java.io.PrintStream;
072    import java.net.InetSocketAddress;
073    import java.net.URI;
074    import java.security.PrivilegedExceptionAction;
075    import java.util.ArrayList;
076    import java.util.Arrays;
077    import java.util.Collection;
078    import java.util.List;
079    
080    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
081    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
082    import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
083    import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
084    import static org.apache.hadoop.util.ExitUtil.terminate;
085    import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
086    
087    /**********************************************************
088     * NameNode serves as both directory namespace manager and
089     * "inode table" for the Hadoop DFS.  There is a single NameNode
090     * running in any DFS deployment.  (Well, except when there
091     * is a second backup/failover NameNode, or when using federated NameNodes.)
092     *
093     * The NameNode controls two critical tables:
094     *   1)  filename->blocksequence (namespace)
095     *   2)  block->machinelist ("inodes")
096     *
097     * The first table is stored on disk and is very precious.
098     * The second table is rebuilt every time the NameNode comes up.
099     *
100     * 'NameNode' refers to both this class as well as the 'NameNode server'.
101     * The 'FSNamesystem' class actually performs most of the filesystem
102     * management.  The majority of the 'NameNode' class itself is concerned
103     * with exposing the IPC interface and the HTTP server to the outside world,
104     * plus some configuration management.
105     *
106     * NameNode implements the
107     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
108     * allows clients to ask for DFS services.
109     * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
110     * direct use by authors of DFS client code.  End-users should instead use the
111     * {@link org.apache.hadoop.fs.FileSystem} class.
112     *
113     * NameNode also implements the
114     * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
115     * used by DataNodes that actually store DFS data blocks.  These
116     * methods are invoked repeatedly and automatically by all the
117     * DataNodes in a DFS deployment.
118     *
119     * NameNode also implements the
120     * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
121     * used by secondary namenodes or rebalancing processes to get partial
122     * NameNode state, for example partial blocksMap etc.
123     **********************************************************/
124    @InterfaceAudience.Private
125    public class NameNode implements NameNodeStatusMXBean {
126      static{
127        HdfsConfiguration.init();
128      }
129    
130      /**
131       * Categories of operations supported by the namenode.
132       */
133      public static enum OperationCategory {
134        /** Operations that are state agnostic */
135        UNCHECKED,
136        /** Read operation that does not change the namespace state */
137        READ,
138        /** Write operation that changes the namespace state */
139        WRITE,
140        /** Operations related to checkpointing */
141        CHECKPOINT,
142        /** Operations related to {@link JournalProtocol} */
143        JOURNAL
144      }
145      
146      /**
147       * HDFS configuration can have three types of parameters:
148       * <ol>
149       * <li>Parameters that are common for all the name services in the cluster.</li>
150       * <li>Parameters that are specific to a name service. These keys are suffixed
151       * with nameserviceId in the configuration. For example,
152       * "dfs.namenode.rpc-address.nameservice1".</li>
153       * <li>Parameters that are specific to a single name node. These keys are suffixed
154       * with nameserviceId and namenodeId in the configuration. for example,
155       * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
156       * </ol>
157       * 
158       * In the latter cases, operators may specify the configuration without
159       * any suffix, with a nameservice suffix, or with a nameservice and namenode
160       * suffix. The more specific suffix will take precedence.
161       * 
162       * These keys are specific to a given namenode, and thus may be configured
163       * globally, for a nameservice, or for a specific namenode within a nameservice.
164       */
165      public static final String[] NAMENODE_SPECIFIC_KEYS = {
166        DFS_NAMENODE_RPC_ADDRESS_KEY,
167        DFS_NAMENODE_RPC_BIND_HOST_KEY,
168        DFS_NAMENODE_NAME_DIR_KEY,
169        DFS_NAMENODE_EDITS_DIR_KEY,
170        DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
171        DFS_NAMENODE_CHECKPOINT_DIR_KEY,
172        DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
173        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
174        DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
175        DFS_NAMENODE_HTTP_ADDRESS_KEY,
176        DFS_NAMENODE_HTTPS_ADDRESS_KEY,
177        DFS_NAMENODE_KEYTAB_FILE_KEY,
178        DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
179        DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
180        DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181        DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182        DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183        DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184        DFS_NAMENODE_USER_NAME_KEY,
185        DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186        DFS_HA_FENCE_METHODS_KEY,
187        DFS_HA_ZKFC_PORT_KEY,
188        DFS_HA_FENCE_METHODS_KEY
189      };
190      
191      /**
192       * @see #NAMENODE_SPECIFIC_KEYS
193       * These keys are specific to a nameservice, but may not be overridden
194       * for a specific namenode.
195       */
196      public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197        DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198      };
199      
200      private static final String USAGE = "Usage: java NameNode ["
201          + StartupOption.BACKUP.getName() + "] | ["
202          + StartupOption.CHECKPOINT.getName() + "] | ["
203          + StartupOption.FORMAT.getName() + " ["
204          + StartupOption.CLUSTERID.getName() + " cid ] ["
205          + StartupOption.FORCE.getName() + "] ["
206          + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207          + StartupOption.UPGRADE.getName() + 
208            " [" + StartupOption.CLUSTERID.getName() + " cid]" +
209            " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | ["
210          + StartupOption.ROLLBACK.getName() + "] | ["
211          + StartupOption.ROLLINGUPGRADE.getName() + " <"
212          + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|"
213          + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | ["
214          + StartupOption.FINALIZE.getName() + "] | ["
215          + StartupOption.IMPORT.getName() + "] | ["
216          + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
217          + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
218          + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
219          + " ] ]";
220      
221      public long getProtocolVersion(String protocol, 
222                                     long clientVersion) throws IOException {
223        if (protocol.equals(ClientProtocol.class.getName())) {
224          return ClientProtocol.versionID; 
225        } else if (protocol.equals(DatanodeProtocol.class.getName())){
226          return DatanodeProtocol.versionID;
227        } else if (protocol.equals(NamenodeProtocol.class.getName())){
228          return NamenodeProtocol.versionID;
229        } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
230          return RefreshAuthorizationPolicyProtocol.versionID;
231        } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
232          return RefreshUserMappingsProtocol.versionID;
233        } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
234          return RefreshCallQueueProtocol.versionID;
235        } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
236          return GetUserMappingsProtocol.versionID;
237        } else {
238          throw new IOException("Unknown protocol to name node: " + protocol);
239        }
240      }
241        
242      public static final int DEFAULT_PORT = 8020;
243      public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
244      public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
245      public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
246      public static final HAState ACTIVE_STATE = new ActiveState();
247      public static final HAState STANDBY_STATE = new StandbyState();
248      
249      protected FSNamesystem namesystem; 
250      protected final Configuration conf;
251      protected final NamenodeRole role;
252      private volatile HAState state;
253      private final boolean haEnabled;
254      private final HAContext haContext;
255      protected final boolean allowStaleStandbyReads;
256    
257      
258      /** httpServer */
259      protected NameNodeHttpServer httpServer;
260      private Thread emptier;
261      /** only used for testing purposes  */
262      protected boolean stopRequested = false;
263      /** Registration information of this name-node  */
264      protected NamenodeRegistration nodeRegistration;
265      /** Activated plug-ins. */
266      private List<ServicePlugin> plugins;
267      
268      private NameNodeRpcServer rpcServer;
269    
270      private JvmPauseMonitor pauseMonitor;
271      private ObjectName nameNodeStatusBeanName;
272      /**
273       * The service name of the delegation token issued by the namenode. It is
274       * the name service id in HA mode, or the rpc address in non-HA mode.
275       */
276      private String tokenServiceName;
277      
278      /** Format a new filesystem.  Destroys any filesystem that may already
279       * exist at this location.  **/
280      public static void format(Configuration conf) throws IOException {
281        format(conf, true, true);
282      }
283    
284      static NameNodeMetrics metrics;
285      private static final StartupProgress startupProgress = new StartupProgress();
286      /** Return the {@link FSNamesystem} object.
287       * @return {@link FSNamesystem} object.
288       */
289      public FSNamesystem getNamesystem() {
290        return namesystem;
291      }
292    
293      public NamenodeProtocols getRpcServer() {
294        return rpcServer;
295      }
296      
297      static void initMetrics(Configuration conf, NamenodeRole role) {
298        metrics = NameNodeMetrics.create(conf, role);
299      }
300    
301      public static NameNodeMetrics getNameNodeMetrics() {
302        return metrics;
303      }
304    
305      /**
306       * Returns object used for reporting namenode startup progress.
307       * 
308       * @return StartupProgress for reporting namenode startup progress
309       */
310      public static StartupProgress getStartupProgress() {
311        return startupProgress;
312      }
313    
314      /**
315       * Return the service name of the issued delegation token.
316       *
317       * @return The name service id in HA-mode, or the rpc address in non-HA mode
318       */
319      public String getTokenServiceName() { return tokenServiceName; }
320    
321      public static InetSocketAddress getAddress(String address) {
322        return NetUtils.createSocketAddr(address, DEFAULT_PORT);
323      }
324      
325      /**
326       * Set the configuration property for the service rpc address
327       * to address
328       */
329      public static void setServiceAddress(Configuration conf,
330                                               String address) {
331        LOG.info("Setting ADDRESS " + address);
332        conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
333      }
334      
335      /**
336       * Fetches the address for services to use when connecting to namenode
337       * based on the value of fallback returns null if the special
338       * address is not specified or returns the default namenode address
339       * to be used by both clients and services.
340       * Services here are datanodes, backup node, any non client connection
341       */
342      public static InetSocketAddress getServiceAddress(Configuration conf,
343                                                            boolean fallback) {
344        String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
345        if (addr == null || addr.isEmpty()) {
346          return fallback ? getAddress(conf) : null;
347        }
348        return getAddress(addr);
349      }
350    
351      public static InetSocketAddress getAddress(Configuration conf) {
352        URI filesystemURI = FileSystem.getDefaultUri(conf);
353        return getAddress(filesystemURI);
354      }
355    
356    
357      /**
358       * TODO:FEDERATION
359       * @param filesystemURI
360       * @return address of file system
361       */
362      public static InetSocketAddress getAddress(URI filesystemURI) {
363        String authority = filesystemURI.getAuthority();
364        if (authority == null) {
365          throw new IllegalArgumentException(String.format(
366              "Invalid URI for NameNode address (check %s): %s has no authority.",
367              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
368        }
369        if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
370            filesystemURI.getScheme())) {
371          throw new IllegalArgumentException(String.format(
372              "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
373              FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
374              HdfsConstants.HDFS_URI_SCHEME));
375        }
376        return getAddress(authority);
377      }
378    
379      public static URI getUri(InetSocketAddress namenode) {
380        int port = namenode.getPort();
381        String portString = port == DEFAULT_PORT ? "" : (":"+port);
382        return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
383            + namenode.getHostName()+portString);
384      }
385    
386      //
387      // Common NameNode methods implementation for the active name-node role.
388      //
389      public NamenodeRole getRole() {
390        return role;
391      }
392    
393      boolean isRole(NamenodeRole that) {
394        return role.equals(that);
395      }
396    
397      /**
398       * Given a configuration get the address of the service rpc server
399       * If the service rpc is not configured returns null
400       */
401      protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
402        return NameNode.getServiceAddress(conf, false);
403      }
404    
405      protected InetSocketAddress getRpcServerAddress(Configuration conf) {
406        return getAddress(conf);
407      }
408      
409      /** Given a configuration get the bind host of the service rpc server
410       *  If the bind host is not configured returns null.
411       */
412      protected String getServiceRpcServerBindHost(Configuration conf) {
413        String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
414        if (addr == null || addr.isEmpty()) {
415          return null;
416        }
417        return addr;
418      }
419    
420      /** Given a configuration get the bind host of the client rpc server
421       *  If the bind host is not configured returns null.
422       */
423      protected String getRpcServerBindHost(Configuration conf) {
424        String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
425        if (addr == null || addr.isEmpty()) {
426          return null;
427        }
428        return addr;
429      }
430       
431      /**
432       * Modifies the configuration passed to contain the service rpc address setting
433       */
434      protected void setRpcServiceServerAddress(Configuration conf,
435          InetSocketAddress serviceRPCAddress) {
436        setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
437      }
438    
439      protected void setRpcServerAddress(Configuration conf,
440          InetSocketAddress rpcAddress) {
441        FileSystem.setDefaultUri(conf, getUri(rpcAddress));
442      }
443    
444      protected InetSocketAddress getHttpServerAddress(Configuration conf) {
445        return getHttpAddress(conf);
446      }
447    
448      /** @return the NameNode HTTP address. */
449      public static InetSocketAddress getHttpAddress(Configuration conf) {
450        return  NetUtils.createSocketAddr(
451            conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
452      }
453    
454      protected void loadNamesystem(Configuration conf) throws IOException {
455        this.namesystem = FSNamesystem.loadFromDisk(conf);
456      }
457    
458      NamenodeRegistration getRegistration() {
459        return nodeRegistration;
460      }
461    
462      NamenodeRegistration setRegistration() {
463        nodeRegistration = new NamenodeRegistration(
464            NetUtils.getHostPortString(rpcServer.getRpcAddress()),
465            NetUtils.getHostPortString(getHttpAddress()),
466            getFSImage().getStorage(), getRole());
467        return nodeRegistration;
468      }
469    
470      /* optimize ugi lookup for RPC operations to avoid a trip through
471       * UGI.getCurrentUser which is synch'ed
472       */
473      public static UserGroupInformation getRemoteUser() throws IOException {
474        UserGroupInformation ugi = Server.getRemoteUser();
475        return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
476      }
477    
478    
479      /**
480       * Login as the configured user for the NameNode.
481       */
482      void loginAsNameNodeUser(Configuration conf) throws IOException {
483        InetSocketAddress socAddr = getRpcServerAddress(conf);
484        SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
485            DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
486      }
487      
488      /**
489       * Initialize name-node.
490       * 
491       * @param conf the configuration
492       */
493      protected void initialize(Configuration conf) throws IOException {
494        if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
495          String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
496          if (intervals != null) {
497            conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
498              intervals);
499          }
500        }
501    
502        UserGroupInformation.setConfiguration(conf);
503        loginAsNameNodeUser(conf);
504    
505        NameNode.initMetrics(conf, this.getRole());
506        StartupProgressMetrics.register(startupProgress);
507    
508        if (NamenodeRole.NAMENODE == role) {
509          startHttpServer(conf);
510        }
511        loadNamesystem(conf);
512    
513        rpcServer = createRpcServer(conf);
514        final String nsId = getNameServiceId(conf);
515        tokenServiceName = HAUtil.isHAEnabled(conf, nsId) ? nsId : NetUtils
516                .getHostPortString(rpcServer.getRpcAddress());
517        if (NamenodeRole.NAMENODE == role) {
518          httpServer.setNameNodeAddress(getNameNodeAddress());
519          httpServer.setFSImage(getFSImage());
520        }
521        
522        pauseMonitor = new JvmPauseMonitor(conf);
523        pauseMonitor.start();
524    
525        startCommonServices(conf);
526      }
527      
528      /**
529       * Create the RPC server implementation. Used as an extension point for the
530       * BackupNode.
531       */
532      protected NameNodeRpcServer createRpcServer(Configuration conf)
533          throws IOException {
534        return new NameNodeRpcServer(conf, this);
535      }
536    
537      /** Start the services common to active and standby states */
538      private void startCommonServices(Configuration conf) throws IOException {
539        namesystem.startCommonServices(conf, haContext);
540        registerNNSMXBean();
541        if (NamenodeRole.NAMENODE != role) {
542          startHttpServer(conf);
543          httpServer.setNameNodeAddress(getNameNodeAddress());
544          httpServer.setFSImage(getFSImage());
545        }
546        rpcServer.start();
547        plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
548            ServicePlugin.class);
549        for (ServicePlugin p: plugins) {
550          try {
551            p.start(this);
552          } catch (Throwable t) {
553            LOG.warn("ServicePlugin " + p + " could not be started", t);
554          }
555        }
556        LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
557        if (rpcServer.getServiceRpcAddress() != null) {
558          LOG.info(getRole() + " service RPC up at: "
559              + rpcServer.getServiceRpcAddress());
560        }
561      }
562      
563      private void stopCommonServices() {
564        if(rpcServer != null) rpcServer.stop();
565        if(namesystem != null) namesystem.close();
566        if (pauseMonitor != null) pauseMonitor.stop();
567        if (plugins != null) {
568          for (ServicePlugin p : plugins) {
569            try {
570              p.stop();
571            } catch (Throwable t) {
572              LOG.warn("ServicePlugin " + p + " could not be stopped", t);
573            }
574          }
575        }   
576        stopHttpServer();
577      }
578      
579      private void startTrashEmptier(final Configuration conf) throws IOException {
580        long trashInterval =
581            conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
582        if (trashInterval == 0) {
583          return;
584        } else if (trashInterval < 0) {
585          throw new IOException("Cannot start trash emptier with negative interval."
586              + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
587        }
588        
589        // This may be called from the transitionToActive code path, in which
590        // case the current user is the administrator, not the NN. The trash
591        // emptier needs to run as the NN. See HDFS-3972.
592        FileSystem fs = SecurityUtil.doAsLoginUser(
593            new PrivilegedExceptionAction<FileSystem>() {
594              @Override
595              public FileSystem run() throws IOException {
596                return FileSystem.get(conf);
597              }
598            });
599        this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
600        this.emptier.setDaemon(true);
601        this.emptier.start();
602      }
603      
604      private void stopTrashEmptier() {
605        if (this.emptier != null) {
606          emptier.interrupt();
607          emptier = null;
608        }
609      }
610      
611      private void startHttpServer(final Configuration conf) throws IOException {
612        httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
613        httpServer.start();
614        httpServer.setStartupProgress(startupProgress);
615      }
616      
617      private void stopHttpServer() {
618        try {
619          if (httpServer != null) httpServer.stop();
620        } catch (Exception e) {
621          LOG.error("Exception while stopping httpserver", e);
622        }
623      }
624    
625      /**
626       * Start NameNode.
627       * <p>
628       * The name-node can be started with one of the following startup options:
629       * <ul> 
630       * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
631       * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
632       * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
633       * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
634       * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
635       * upgrade and create a snapshot of the current file system state</li> 
636       * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
637       * metadata</li>
638       * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
639       *            cluster back to the previous state</li>
640       * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
641       *            previous upgrade</li>
642       * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
643       * </ul>
644       * The option is passed via configuration field: 
645       * <tt>dfs.namenode.startup</tt>
646       * 
647       * The conf will be modified to reflect the actual ports on which 
648       * the NameNode is up and running if the user passes the port as
649       * <code>zero</code> in the conf.
650       * 
651       * @param conf  confirguration
652       * @throws IOException
653       */
654      public NameNode(Configuration conf) throws IOException {
655        this(conf, NamenodeRole.NAMENODE);
656      }
657    
658      protected NameNode(Configuration conf, NamenodeRole role) 
659          throws IOException { 
660        this.conf = conf;
661        this.role = role;
662        String nsId = getNameServiceId(conf);
663        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
664        this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
665        state = createHAState(getStartupOption(conf));
666        this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
667        this.haContext = createHAContext();
668        try {
669          initializeGenericKeys(conf, nsId, namenodeId);
670          initialize(conf);
671          try {
672            haContext.writeLock();
673            state.prepareToEnterState(haContext);
674            state.enterState(haContext);
675          } finally {
676            haContext.writeUnlock();
677          }
678        } catch (IOException e) {
679          this.stop();
680          throw e;
681        } catch (HadoopIllegalArgumentException e) {
682          this.stop();
683          throw e;
684        }
685      }
686    
687      protected HAState createHAState(StartupOption startOpt) {
688        if (!haEnabled || startOpt == StartupOption.UPGRADE) {
689          return ACTIVE_STATE;
690        } else {
691          return STANDBY_STATE;
692        }
693      }
694    
695      protected HAContext createHAContext() {
696        return new NameNodeHAContext();
697      }
698    
699      /**
700       * Wait for service to finish.
701       * (Normally, it runs forever.)
702       */
703      public void join() {
704        try {
705          rpcServer.join();
706        } catch (InterruptedException ie) {
707          LOG.info("Caught interrupted exception ", ie);
708        }
709      }
710    
711      /**
712       * Stop all NameNode threads and wait for all to finish.
713       */
714      public void stop() {
715        synchronized(this) {
716          if (stopRequested)
717            return;
718          stopRequested = true;
719        }
720        try {
721          if (state != null) {
722            state.exitState(haContext);
723          }
724        } catch (ServiceFailedException e) {
725          LOG.warn("Encountered exception while exiting state ", e);
726        } finally {
727          stopCommonServices();
728          if (metrics != null) {
729            metrics.shutdown();
730          }
731          if (namesystem != null) {
732            namesystem.shutdown();
733          }
734          if (nameNodeStatusBeanName != null) {
735            MBeans.unregister(nameNodeStatusBeanName);
736            nameNodeStatusBeanName = null;
737          }
738        }
739      }
740    
741      synchronized boolean isStopRequested() {
742        return stopRequested;
743      }
744    
745      /**
746       * Is the cluster currently in safe mode?
747       */
748      public boolean isInSafeMode() {
749        return namesystem.isInSafeMode();
750      }
751        
752      /** get FSImage */
753      @VisibleForTesting
754      public FSImage getFSImage() {
755        return namesystem.dir.fsImage;
756      }
757    
758      /**
759       * @return NameNode RPC address
760       */
761      public InetSocketAddress getNameNodeAddress() {
762        return rpcServer.getRpcAddress();
763      }
764    
765      /**
766       * @return NameNode RPC address in "host:port" string form
767       */
768      public String getNameNodeAddressHostPortString() {
769        return NetUtils.getHostPortString(rpcServer.getRpcAddress());
770      }
771    
772      /**
773       * @return NameNode service RPC address if configured, the
774       *    NameNode RPC address otherwise
775       */
776      public InetSocketAddress getServiceRpcAddress() {
777        final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
778        return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
779      }
780    
781      /**
782       * @return NameNode HTTP address, used by the Web UI, image transfer,
783       *    and HTTP-based file system clients like Hftp and WebHDFS
784       */
785      public InetSocketAddress getHttpAddress() {
786        return httpServer.getHttpAddress();
787      }
788    
789      /**
790       * @return NameNode HTTPS address, used by the Web UI, image transfer,
791       *    and HTTP-based file system clients like Hftp and WebHDFS
792       */
793      public InetSocketAddress getHttpsAddress() {
794        return httpServer.getHttpsAddress();
795      }
796    
797      /**
798       * Verify that configured directories exist, then
799       * Interactively confirm that formatting is desired 
800       * for each existing directory and format them.
801       * 
802       * @param conf
803       * @param force
804       * @return true if formatting was aborted, false otherwise
805       * @throws IOException
806       */
807      private static boolean format(Configuration conf, boolean force,
808          boolean isInteractive) throws IOException {
809        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
810        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
811        initializeGenericKeys(conf, nsId, namenodeId);
812        checkAllowFormat(conf);
813    
814        if (UserGroupInformation.isSecurityEnabled()) {
815          InetSocketAddress socAddr = getAddress(conf);
816          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
817              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
818        }
819        
820        Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
821        List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
822        List<URI> dirsToPrompt = new ArrayList<URI>();
823        dirsToPrompt.addAll(nameDirsToFormat);
824        dirsToPrompt.addAll(sharedDirs);
825        List<URI> editDirsToFormat = 
826                     FSNamesystem.getNamespaceEditsDirs(conf);
827    
828        // if clusterID is not provided - see if you can find the current one
829        String clusterId = StartupOption.FORMAT.getClusterId();
830        if(clusterId == null || clusterId.equals("")) {
831          //Generate a new cluster id
832          clusterId = NNStorage.newClusterID();
833        }
834        System.out.println("Formatting using clusterid: " + clusterId);
835        
836        FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
837        try {
838          FSNamesystem fsn = new FSNamesystem(conf, fsImage);
839          fsImage.getEditLog().initJournalsForWrite();
840    
841          if (!fsImage.confirmFormat(force, isInteractive)) {
842            return true; // aborted
843          }
844    
845          fsImage.format(fsn, clusterId);
846        } catch (IOException ioe) {
847          LOG.warn("Encountered exception during format: ", ioe);
848          fsImage.close();
849          throw ioe;
850        }
851        return false;
852      }
853    
854      public static void checkAllowFormat(Configuration conf) throws IOException {
855        if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
856            DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
857          throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
858                    + " is set to false for this filesystem, so it "
859                    + "cannot be formatted. You will need to set "
860                    + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
861                    + "to true in order to format this filesystem");
862        }
863      }
864      
865      @VisibleForTesting
866      public static boolean initializeSharedEdits(Configuration conf) throws IOException {
867        return initializeSharedEdits(conf, true);
868      }
869      
870      @VisibleForTesting
871      public static boolean initializeSharedEdits(Configuration conf,
872          boolean force) throws IOException {
873        return initializeSharedEdits(conf, force, false);
874      }
875    
876      /**
877       * Clone the supplied configuration but remove the shared edits dirs.
878       *
879       * @param conf Supplies the original configuration.
880       * @return Cloned configuration without the shared edit dirs.
881       * @throws IOException on failure to generate the configuration.
882       */
883      private static Configuration getConfigurationWithoutSharedEdits(
884          Configuration conf)
885          throws IOException {
886        List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
887        String editsDirsString = Joiner.on(",").join(editsDirs);
888    
889        Configuration confWithoutShared = new Configuration(conf);
890        confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
891        confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
892            editsDirsString);
893        return confWithoutShared;
894      }
895    
896      /**
897       * Format a new shared edits dir and copy in enough edit log segments so that
898       * the standby NN can start up.
899       * 
900       * @param conf configuration
901       * @param force format regardless of whether or not the shared edits dir exists
902       * @param interactive prompt the user when a dir exists
903       * @return true if the command aborts, false otherwise
904       */
905      private static boolean initializeSharedEdits(Configuration conf,
906          boolean force, boolean interactive) throws IOException {
907        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
908        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
909        initializeGenericKeys(conf, nsId, namenodeId);
910        
911        if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
912          LOG.fatal("No shared edits directory configured for namespace " +
913              nsId + " namenode " + namenodeId);
914          return false;
915        }
916    
917        if (UserGroupInformation.isSecurityEnabled()) {
918          InetSocketAddress socAddr = getAddress(conf);
919          SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
920              DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
921        }
922    
923        NNStorage existingStorage = null;
924        FSImage sharedEditsImage = null;
925        try {
926          FSNamesystem fsns =
927              FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
928          
929          existingStorage = fsns.getFSImage().getStorage();
930          NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
931          
932          List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
933          
934          sharedEditsImage = new FSImage(conf,
935              Lists.<URI>newArrayList(),
936              sharedEditsDirs);
937          sharedEditsImage.getEditLog().initJournalsForWrite();
938          
939          if (!sharedEditsImage.confirmFormat(force, interactive)) {
940            return true; // abort
941          }
942          
943          NNStorage newSharedStorage = sharedEditsImage.getStorage();
944          // Call Storage.format instead of FSImage.format here, since we don't
945          // actually want to save a checkpoint - just prime the dirs with
946          // the existing namespace info
947          newSharedStorage.format(nsInfo);
948          sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
949    
950          // Need to make sure the edit log segments are in good shape to initialize
951          // the shared edits dir.
952          fsns.getFSImage().getEditLog().close();
953          fsns.getFSImage().getEditLog().initJournalsForWrite();
954          fsns.getFSImage().getEditLog().recoverUnclosedStreams();
955    
956          copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
957              conf);
958        } catch (IOException ioe) {
959          LOG.error("Could not initialize shared edits dir", ioe);
960          return true; // aborted
961        } finally {
962          if (sharedEditsImage != null) {
963            try {
964              sharedEditsImage.close();
965            }  catch (IOException ioe) {
966              LOG.warn("Could not close sharedEditsImage", ioe);
967            }
968          }
969          // Have to unlock storage explicitly for the case when we're running in a
970          // unit test, which runs in the same JVM as NNs.
971          if (existingStorage != null) {
972            try {
973              existingStorage.unlockAll();
974            } catch (IOException ioe) {
975              LOG.warn("Could not unlock storage directories", ioe);
976              return true; // aborted
977            }
978          }
979        }
980        return false; // did not abort
981      }
982    
983      private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
984          Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
985          Configuration conf) throws IOException {
986        Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
987            "No shared edits specified");
988        // Copy edit log segments into the new shared edits dir.
989        List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
990        FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
991            sharedEditsUris);
992        newSharedEditLog.initJournalsForWrite();
993        newSharedEditLog.recoverUnclosedStreams();
994        
995        FSEditLog sourceEditLog = fsns.getFSImage().editLog;
996        
997        long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
998        
999        Collection<EditLogInputStream> streams = null;
1000        try {
1001          streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1002    
1003          // Set the nextTxid to the CheckpointTxId+1
1004          newSharedEditLog.setNextTxId(fromTxId + 1);
1005    
1006          // Copy all edits after last CheckpointTxId to shared edits dir
1007          for (EditLogInputStream stream : streams) {
1008            LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1009            FSEditLogOp op;
1010            boolean segmentOpen = false;
1011            while ((op = stream.readOp()) != null) {
1012              if (LOG.isTraceEnabled()) {
1013                LOG.trace("copying op: " + op);
1014              }
1015              if (!segmentOpen) {
1016                newSharedEditLog.startLogSegment(op.txid, false);
1017                segmentOpen = true;
1018              }
1019    
1020              newSharedEditLog.logEdit(op);
1021    
1022              if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1023                newSharedEditLog.logSync();
1024                newSharedEditLog.endCurrentLogSegment(false);
1025                LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1026                    + stream);
1027                segmentOpen = false;
1028              }
1029            }
1030    
1031            if (segmentOpen) {
1032              LOG.debug("ending log segment because of end of stream in " + stream);
1033              newSharedEditLog.logSync();
1034              newSharedEditLog.endCurrentLogSegment(false);
1035              segmentOpen = false;
1036            }
1037          }
1038        } finally {
1039          if (streams != null) {
1040            FSEditLog.closeAllStreams(streams);
1041          }
1042        }
1043      }
1044      
1045      @VisibleForTesting
1046      public static boolean doRollback(Configuration conf,
1047          boolean isConfirmationNeeded) throws IOException {
1048        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1049        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1050        initializeGenericKeys(conf, nsId, namenodeId);
1051    
1052        FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1053        System.err.print(
1054            "\"rollBack\" will remove the current state of the file system,\n"
1055            + "returning you to the state prior to initiating your recent.\n"
1056            + "upgrade. This action is permanent and cannot be undone. If you\n"
1057            + "are performing a rollback in an HA environment, you should be\n"
1058            + "certain that no NameNode process is running on any host.");
1059        if (isConfirmationNeeded) {
1060          if (!confirmPrompt("Roll back file system state?")) {
1061            System.err.println("Rollback aborted.");
1062            return true;
1063          }
1064        }
1065        nsys.dir.fsImage.doRollback(nsys);
1066        return false;
1067      }
1068    
1069      private static void printUsage(PrintStream out) {
1070        out.println(USAGE + "\n");
1071      }
1072    
1073      @VisibleForTesting
1074      static StartupOption parseArguments(String args[]) {
1075        int argsLen = (args == null) ? 0 : args.length;
1076        StartupOption startOpt = StartupOption.REGULAR;
1077        for(int i=0; i < argsLen; i++) {
1078          String cmd = args[i];
1079          if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1080            startOpt = StartupOption.FORMAT;
1081            for (i = i + 1; i < argsLen; i++) {
1082              if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1083                i++;
1084                if (i >= argsLen) {
1085                  // if no cluster id specified, return null
1086                  LOG.fatal("Must specify a valid cluster ID after the "
1087                      + StartupOption.CLUSTERID.getName() + " flag");
1088                  return null;
1089                }
1090                String clusterId = args[i];
1091                // Make sure an id is specified and not another flag
1092                if (clusterId.isEmpty() ||
1093                    clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1094                    clusterId.equalsIgnoreCase(
1095                        StartupOption.NONINTERACTIVE.getName())) {
1096                  LOG.fatal("Must specify a valid cluster ID after the "
1097                      + StartupOption.CLUSTERID.getName() + " flag");
1098                  return null;
1099                }
1100                startOpt.setClusterId(clusterId);
1101              }
1102    
1103              if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1104                startOpt.setForceFormat(true);
1105              }
1106    
1107              if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1108                startOpt.setInteractiveFormat(false);
1109              }
1110            }
1111          } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1112            startOpt = StartupOption.GENCLUSTERID;
1113          } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1114            startOpt = StartupOption.REGULAR;
1115          } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1116            startOpt = StartupOption.BACKUP;
1117          } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1118            startOpt = StartupOption.CHECKPOINT;
1119          } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1120            startOpt = StartupOption.UPGRADE;
1121            /* Can be followed by CLUSTERID with a required parameter or
1122             * RENAMERESERVED with an optional parameter
1123             */
1124            while (i + 1 < argsLen) {
1125              String flag = args[i + 1];
1126              if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1127                if (i + 2 < argsLen) {
1128                  i += 2;
1129                  startOpt.setClusterId(args[i]);
1130                } else {
1131                  LOG.fatal("Must specify a valid cluster ID after the "
1132                      + StartupOption.CLUSTERID.getName() + " flag");
1133                  return null;
1134                }
1135              } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1136                  .getName())) {
1137                if (i + 2 < argsLen) {
1138                  FSImageFormat.setRenameReservedPairs(args[i + 2]);
1139                  i += 2;
1140                } else {
1141                  FSImageFormat.useDefaultRenameReservedPairs();
1142                  i += 1;
1143                }
1144              } else {
1145                LOG.fatal("Unknown upgrade flag " + flag);
1146                return null;
1147              }
1148            }
1149          } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1150            startOpt = StartupOption.ROLLINGUPGRADE;
1151            ++i;
1152            startOpt.setRollingUpgradeStartupOption(args[i]);
1153          } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1154            startOpt = StartupOption.ROLLBACK;
1155          } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1156            startOpt = StartupOption.FINALIZE;
1157          } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1158            startOpt = StartupOption.IMPORT;
1159          } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1160            startOpt = StartupOption.BOOTSTRAPSTANDBY;
1161            return startOpt;
1162          } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1163            startOpt = StartupOption.INITIALIZESHAREDEDITS;
1164            for (i = i + 1 ; i < argsLen; i++) {
1165              if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1166                startOpt.setInteractiveFormat(false);
1167              } else if (StartupOption.FORCE.getName().equals(args[i])) {
1168                startOpt.setForceFormat(true);
1169              } else {
1170                LOG.fatal("Invalid argument: " + args[i]);
1171                return null;
1172              }
1173            }
1174            return startOpt;
1175          } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1176            if (startOpt != StartupOption.REGULAR) {
1177              throw new RuntimeException("Can't combine -recover with " +
1178                  "other startup options.");
1179            }
1180            startOpt = StartupOption.RECOVER;
1181            while (++i < argsLen) {
1182              if (args[i].equalsIgnoreCase(
1183                    StartupOption.FORCE.getName())) {
1184                startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1185              } else {
1186                throw new RuntimeException("Error parsing recovery options: " + 
1187                  "can't understand option \"" + args[i] + "\"");
1188              }
1189            }
1190          } else {
1191            return null;
1192          }
1193        }
1194        return startOpt;
1195      }
1196    
1197      private static void setStartupOption(Configuration conf, StartupOption opt) {
1198        conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1199      }
1200    
1201      static StartupOption getStartupOption(Configuration conf) {
1202        return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1203                                              StartupOption.REGULAR.toString()));
1204      }
1205    
1206      private static void doRecovery(StartupOption startOpt, Configuration conf)
1207          throws IOException {
1208        String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1209        String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1210        initializeGenericKeys(conf, nsId, namenodeId);
1211        if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1212          if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1213              "This mode is intended to recover lost metadata on a corrupt " +
1214              "filesystem.  Metadata recovery mode often permanently deletes " +
1215              "data from your HDFS filesystem.  Please back up your edit log " +
1216              "and fsimage before trying this!\n\n" +
1217              "Are you ready to proceed? (Y/N)\n")) {
1218            System.err.println("Recovery aborted at user request.\n");
1219            return;
1220          }
1221        }
1222        MetaRecoveryContext.LOG.info("starting recovery...");
1223        UserGroupInformation.setConfiguration(conf);
1224        NameNode.initMetrics(conf, startOpt.toNodeRole());
1225        FSNamesystem fsn = null;
1226        try {
1227          fsn = FSNamesystem.loadFromDisk(conf);
1228          fsn.getFSImage().saveNamespace(fsn);
1229          MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1230        } catch (IOException e) {
1231          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1232          throw e;
1233        } catch (RuntimeException e) {
1234          MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1235          throw e;
1236        } finally {
1237          if (fsn != null)
1238            fsn.close();
1239        }
1240      }
1241    
1242      public static NameNode createNameNode(String argv[], Configuration conf)
1243          throws IOException {
1244        LOG.info("createNameNode " + Arrays.asList(argv));
1245        if (conf == null)
1246          conf = new HdfsConfiguration();
1247        StartupOption startOpt = parseArguments(argv);
1248        if (startOpt == null) {
1249          printUsage(System.err);
1250          return null;
1251        }
1252        setStartupOption(conf, startOpt);
1253    
1254        switch (startOpt) {
1255          case FORMAT: {
1256            boolean aborted = format(conf, startOpt.getForceFormat(),
1257                startOpt.getInteractiveFormat());
1258            terminate(aborted ? 1 : 0);
1259            return null; // avoid javac warning
1260          }
1261          case GENCLUSTERID: {
1262            System.err.println("Generating new cluster id:");
1263            System.out.println(NNStorage.newClusterID());
1264            terminate(0);
1265            return null;
1266          }
1267          case FINALIZE: {
1268            System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1269                "' is no longer supported. To finalize an upgrade, start the NN " +
1270                " and then run `hdfs dfsadmin -finalizeUpgrade'");
1271            terminate(1);
1272            return null; // avoid javac warning
1273          }
1274          case ROLLBACK: {
1275            boolean aborted = doRollback(conf, true);
1276            terminate(aborted ? 1 : 0);
1277            return null; // avoid warning
1278          }
1279          case BOOTSTRAPSTANDBY: {
1280            String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1281            int rc = BootstrapStandby.run(toolArgs, conf);
1282            terminate(rc);
1283            return null; // avoid warning
1284          }
1285          case INITIALIZESHAREDEDITS: {
1286            boolean aborted = initializeSharedEdits(conf,
1287                startOpt.getForceFormat(),
1288                startOpt.getInteractiveFormat());
1289            terminate(aborted ? 1 : 0);
1290            return null; // avoid warning
1291          }
1292          case BACKUP:
1293          case CHECKPOINT: {
1294            NamenodeRole role = startOpt.toNodeRole();
1295            DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1296            return new BackupNode(conf, role);
1297          }
1298          case RECOVER: {
1299            NameNode.doRecovery(startOpt, conf);
1300            return null;
1301          }
1302          default: {
1303            DefaultMetricsSystem.initialize("NameNode");
1304            return new NameNode(conf);
1305          }
1306        }
1307      }
1308    
1309      /**
1310       * In federation configuration is set for a set of
1311       * namenode and secondary namenode/backup/checkpointer, which are
1312       * grouped under a logical nameservice ID. The configuration keys specific 
1313       * to them have suffix set to configured nameserviceId.
1314       * 
1315       * This method copies the value from specific key of format key.nameserviceId
1316       * to key, to set up the generic configuration. Once this is done, only
1317       * generic version of the configuration is read in rest of the code, for
1318       * backward compatibility and simpler code changes.
1319       * 
1320       * @param conf
1321       *          Configuration object to lookup specific key and to set the value
1322       *          to the key passed. Note the conf object is modified
1323       * @param nameserviceId name service Id (to distinguish federated NNs)
1324       * @param namenodeId the namenode ID (to distinguish HA NNs)
1325       * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1326       */
1327      public static void initializeGenericKeys(Configuration conf,
1328          String nameserviceId, String namenodeId) {
1329        if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1330            (namenodeId != null && !namenodeId.isEmpty())) {
1331          if (nameserviceId != null) {
1332            conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1333          }
1334          if (namenodeId != null) {
1335            conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1336          }
1337          
1338          DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1339              NAMENODE_SPECIFIC_KEYS);
1340          DFSUtil.setGenericConf(conf, nameserviceId, null,
1341              NAMESERVICE_SPECIFIC_KEYS);
1342        }
1343        
1344        // If the RPC address is set use it to (re-)configure the default FS
1345        if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1346          URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1347              + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1348          conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1349          LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1350        }
1351      }
1352        
1353      /** 
1354       * Get the name service Id for the node
1355       * @return name service Id or null if federation is not configured
1356       */
1357      protected String getNameServiceId(Configuration conf) {
1358        return DFSUtil.getNamenodeNameServiceId(conf);
1359      }
1360      
1361      /**
1362       */
1363      public static void main(String argv[]) throws Exception {
1364        if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1365          System.exit(0);
1366        }
1367    
1368        try {
1369          StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1370          NameNode namenode = createNameNode(argv, null);
1371          if (namenode != null) {
1372            namenode.join();
1373          }
1374        } catch (Throwable e) {
1375          LOG.fatal("Exception in namenode join", e);
1376          terminate(1, e);
1377        }
1378      }
1379    
1380      synchronized void monitorHealth() 
1381          throws HealthCheckFailedException, AccessControlException {
1382        namesystem.checkSuperuserPrivilege();
1383        if (!haEnabled) {
1384          return; // no-op, if HA is not enabled
1385        }
1386        getNamesystem().checkAvailableResources();
1387        if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1388          throw new HealthCheckFailedException(
1389              "The NameNode has no resources available");
1390        }
1391      }
1392      
1393      synchronized void transitionToActive() 
1394          throws ServiceFailedException, AccessControlException {
1395        namesystem.checkSuperuserPrivilege();
1396        if (!haEnabled) {
1397          throw new ServiceFailedException("HA for namenode is not enabled");
1398        }
1399        state.setState(haContext, ACTIVE_STATE);
1400      }
1401      
1402      synchronized void transitionToStandby() 
1403          throws ServiceFailedException, AccessControlException {
1404        namesystem.checkSuperuserPrivilege();
1405        if (!haEnabled) {
1406          throw new ServiceFailedException("HA for namenode is not enabled");
1407        }
1408        state.setState(haContext, STANDBY_STATE);
1409      }
1410    
1411      synchronized HAServiceStatus getServiceStatus()
1412          throws ServiceFailedException, AccessControlException {
1413        namesystem.checkSuperuserPrivilege();
1414        if (!haEnabled) {
1415          throw new ServiceFailedException("HA for namenode is not enabled");
1416        }
1417        if (state == null) {
1418          return new HAServiceStatus(HAServiceState.INITIALIZING);
1419        }
1420        HAServiceState retState = state.getServiceState();
1421        HAServiceStatus ret = new HAServiceStatus(retState);
1422        if (retState == HAServiceState.STANDBY) {
1423          String safemodeTip = namesystem.getSafeModeTip();
1424          if (!safemodeTip.isEmpty()) {
1425            ret.setNotReadyToBecomeActive(
1426                "The NameNode is in safemode. " +
1427                safemodeTip);
1428          } else {
1429            ret.setReadyToBecomeActive();
1430          }
1431        } else if (retState == HAServiceState.ACTIVE) {
1432          ret.setReadyToBecomeActive();
1433        } else {
1434          ret.setNotReadyToBecomeActive("State is " + state);
1435        }
1436        return ret;
1437      }
1438    
1439      synchronized HAServiceState getServiceState() {
1440        if (state == null) {
1441          return HAServiceState.INITIALIZING;
1442        }
1443        return state.getServiceState();
1444      }
1445    
1446      /**
1447       * Register NameNodeStatusMXBean
1448       */
1449      private void registerNNSMXBean() {
1450        nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1451      }
1452    
1453      @Override // NameNodeStatusMXBean
1454      public String getNNRole() {
1455        String roleStr = "";
1456        NamenodeRole role = getRole();
1457        if (null != role) {
1458          roleStr = role.toString();
1459        }
1460        return roleStr;
1461      }
1462    
1463      @Override // NameNodeStatusMXBean
1464      public String getState() {
1465        String servStateStr = "";
1466        HAServiceState servState = getServiceState();
1467        if (null != servState) {
1468          servStateStr = servState.toString();
1469        }
1470        return servStateStr;
1471      }
1472    
1473      @Override // NameNodeStatusMXBean
1474      public String getHostAndPort() {
1475        return getNameNodeAddressHostPortString();
1476      }
1477    
1478      @Override // NameNodeStatusMXBean
1479      public boolean isSecurityEnabled() {
1480        return UserGroupInformation.isSecurityEnabled();
1481      }
1482    
1483      /**
1484       * Shutdown the NN immediately in an ungraceful way. Used when it would be
1485       * unsafe for the NN to continue operating, e.g. during a failed HA state
1486       * transition.
1487       * 
1488       * @param t exception which warrants the shutdown. Printed to the NN log
1489       *          before exit.
1490       * @throws ExitException thrown only for testing.
1491       */
1492      protected synchronized void doImmediateShutdown(Throwable t)
1493          throws ExitException {
1494        String message = "Error encountered requiring NN shutdown. " +
1495            "Shutting down immediately.";
1496        try {
1497          LOG.fatal(message, t);
1498        } catch (Throwable ignored) {
1499          // This is unlikely to happen, but there's nothing we can do if it does.
1500        }
1501        terminate(1, t);
1502      }
1503      
1504      /**
1505       * Class used to expose {@link NameNode} as context to {@link HAState}
1506       */
1507      protected class NameNodeHAContext implements HAContext {
1508        @Override
1509        public void setState(HAState s) {
1510          state = s;
1511        }
1512    
1513        @Override
1514        public HAState getState() {
1515          return state;
1516        }
1517    
1518        @Override
1519        public void startActiveServices() throws IOException {
1520          try {
1521            namesystem.startActiveServices();
1522            startTrashEmptier(conf);
1523          } catch (Throwable t) {
1524            doImmediateShutdown(t);
1525          }
1526        }
1527    
1528        @Override
1529        public void stopActiveServices() throws IOException {
1530          try {
1531            if (namesystem != null) {
1532              namesystem.stopActiveServices();
1533            }
1534            stopTrashEmptier();
1535          } catch (Throwable t) {
1536            doImmediateShutdown(t);
1537          }
1538        }
1539    
1540        @Override
1541        public void startStandbyServices() throws IOException {
1542          try {
1543            namesystem.startStandbyServices(conf);
1544          } catch (Throwable t) {
1545            doImmediateShutdown(t);
1546          }
1547        }
1548    
1549        @Override
1550        public void prepareToStopStandbyServices() throws ServiceFailedException {
1551          try {
1552            namesystem.prepareToStopStandbyServices();
1553          } catch (Throwable t) {
1554            doImmediateShutdown(t);
1555          }
1556        }
1557        
1558        @Override
1559        public void stopStandbyServices() throws IOException {
1560          try {
1561            if (namesystem != null) {
1562              namesystem.stopStandbyServices();
1563            }
1564          } catch (Throwable t) {
1565            doImmediateShutdown(t);
1566          }
1567        }
1568        
1569        @Override
1570        public void writeLock() {
1571          namesystem.writeLock();
1572        }
1573        
1574        @Override
1575        public void writeUnlock() {
1576          namesystem.writeUnlock();
1577        }
1578        
1579        /** Check if an operation of given category is allowed */
1580        @Override
1581        public void checkOperation(final OperationCategory op)
1582            throws StandbyException {
1583          state.checkOperation(haContext, op);
1584        }
1585        
1586        @Override
1587        public boolean allowStaleReads() {
1588          return allowStaleStandbyReads;
1589        }
1590    
1591      }
1592      
1593      public boolean isStandbyState() {
1594        return (state.equals(STANDBY_STATE));
1595      }
1596    
1597      /**
1598       * Check that a request to change this node's HA state is valid.
1599       * In particular, verifies that, if auto failover is enabled, non-forced
1600       * requests from the HAAdmin CLI are rejected, and vice versa.
1601       *
1602       * @param req the request to check
1603       * @throws AccessControlException if the request is disallowed
1604       */
1605      void checkHaStateChange(StateChangeRequestInfo req)
1606          throws AccessControlException {
1607        boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1608            DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1609        switch (req.getSource()) {
1610        case REQUEST_BY_USER:
1611          if (autoHaEnabled) {
1612            throw new AccessControlException(
1613                "Manual HA control for this NameNode is disallowed, because " +
1614                "automatic HA is enabled.");
1615          }
1616          break;
1617        case REQUEST_BY_USER_FORCED:
1618          if (autoHaEnabled) {
1619            LOG.warn("Allowing manual HA control from " +
1620                Server.getRemoteAddress() +
1621                " even though automatic HA is enabled, because the user " +
1622                "specified the force flag");
1623          }
1624          break;
1625        case REQUEST_BY_ZKFC:
1626          if (!autoHaEnabled) {
1627            throw new AccessControlException(
1628                "Request from ZK failover controller at " +
1629                Server.getRemoteAddress() + " denied since automatic HA " +
1630                "is not enabled"); 
1631          }
1632          break;
1633        }
1634      }
1635    }