Revert "YARN-7677. Docker image cannot set HADOOP_CONF_DIR. Contributed by Jim Brennan"
[hadoop.git] / hadoop-yarn-project / hadoop-yarn / hadoop-yarn-server / hadoop-yarn-server-nodemanager / src / main / java / org / apache / hadoop / yarn / server / nodemanager / LinuxContainerExecutor.java
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.hadoop.yarn.server.nodemanager;
20
21 import com.google.common.annotations.VisibleForTesting;
22 import com.google.common.base.Optional;
23 import org.slf4j.Logger;
24 import org.slf4j.LoggerFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.security.UserGroupInformation;
28 import org.apache.hadoop.util.ReflectionUtils;
29 import org.apache.hadoop.util.StringUtils;
30 import org.apache.hadoop.yarn.api.ApplicationConstants;
31 import org.apache.hadoop.yarn.api.records.ContainerId;
32 import org.apache.hadoop.yarn.conf.YarnConfiguration;
33 import org.apache.hadoop.yarn.exceptions.ConfigurationException;
34 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
35 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
36 import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
37 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
38 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
39 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
40 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
41 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
42 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
43 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DefaultLinuxContainerRuntime;
44 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DelegatingLinuxContainerRuntime;
45 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime;
46 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
47 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor;
48 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRmCommand;
49 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
50 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
51 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
52 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext;
53 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext;
54 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext;
55 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
56 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
57 import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
58 import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
59 import org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext;
60 import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler;
61 import org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler;
62 import org.apache.hadoop.yarn.server.nodemanager.util.LCEResourcesHandler;
63 import java.io.File;
64 import java.io.IOException;
65 import java.net.InetSocketAddress;
66 import java.util.ArrayList;
67 import java.util.Arrays;
68 import java.util.List;
69 import java.util.Map;
70 import java.util.regex.Pattern;
71
72 import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*;
73
74 /**
75 * <p>This class provides {@link Container} execution using a native
76 * {@code container-executor} binary. By using a helper written it native code,
77 * this class is able to do several things that the
78 * {@link DefaultContainerExecutor} cannot, such as execution of applications
79 * as the applications' owners, provide localization that takes advantage of
80 * mapping the application owner to a UID on the execution host, resource
81 * management through Linux CGROUPS, and Docker support.</p>
82 *
83 * <p>If {@code hadoop.security.authetication} is set to {@code simple},
84 * then the
85 * {@code yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users}
86 * property will determine whether the {@code LinuxContainerExecutor} runs
87 * processes as the application owner or as the default user, as set in the
88 * {@code yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user}
89 * property.</p>
90 *
91 * <p>The {@code LinuxContainerExecutor} will manage applications through an
92 * appropriate {@link LinuxContainerRuntime} instance. This class uses a
93 * {@link DelegatingLinuxContainerRuntime} instance, which will delegate calls
94 * to either a {@link DefaultLinuxContainerRuntime} instance or a
95 * {@link DockerLinuxContainerRuntime} instance, depending on the job's
96 * configuration.</p>
97 *
98 * @see LinuxContainerRuntime
99 * @see DelegatingLinuxContainerRuntime
100 * @see DefaultLinuxContainerRuntime
101 * @see DockerLinuxContainerRuntime
102 * @see DockerLinuxContainerRuntime#isDockerContainerRequested
103 */
104 public class LinuxContainerExecutor extends ContainerExecutor {
105
106 private static final Logger LOG =
107 LoggerFactory.getLogger(LinuxContainerExecutor.class);
108
109 private String nonsecureLocalUser;
110 private Pattern nonsecureLocalUserPattern;
111 private LCEResourcesHandler resourcesHandler;
112 private boolean containerSchedPriorityIsSet = false;
113 private int containerSchedPriorityAdjustment = 0;
114 private boolean containerLimitUsers;
115 private ResourceHandler resourceHandlerChain;
116 private LinuxContainerRuntime linuxContainerRuntime;
117
118 /**
119 * The container exit code.
120 */
121 public enum ExitCode {
122 SUCCESS(0),
123 INVALID_ARGUMENT_NUMBER(1),
124 INVALID_COMMAND_PROVIDED(3),
125 INVALID_NM_ROOT_DIRS(5),
126 SETUID_OPER_FAILED(6),
127 UNABLE_TO_EXECUTE_CONTAINER_SCRIPT(7),
128 UNABLE_TO_SIGNAL_CONTAINER(8),
129 INVALID_CONTAINER_PID(9),
130 OUT_OF_MEMORY(18),
131 INITIALIZE_USER_FAILED(20),
132 PATH_TO_DELETE_IS_NULL(21),
133 INVALID_CONTAINER_EXEC_PERMISSIONS(22),
134 INVALID_CONFIG_FILE(24),
135 SETSID_OPER_FAILED(25),
136 WRITE_PIDFILE_FAILED(26),
137 WRITE_CGROUP_FAILED(27),
138 TRAFFIC_CONTROL_EXECUTION_FAILED(28),
139 DOCKER_RUN_FAILED(29),
140 ERROR_OPENING_DOCKER_FILE(30),
141 ERROR_READING_DOCKER_FILE(31),
142 FEATURE_DISABLED(32),
143 COULD_NOT_CREATE_SCRIPT_COPY(33),
144 COULD_NOT_CREATE_CREDENTIALS_FILE(34),
145 COULD_NOT_CREATE_WORK_DIRECTORIES(35),
146 COULD_NOT_CREATE_APP_LOG_DIRECTORIES(36),
147 COULD_NOT_CREATE_TMP_DIRECTORIES(37),
148 ERROR_CREATE_CONTAINER_DIRECTORIES_ARGUMENTS(38);
149
150 private final int code;
151
152 ExitCode(int exitCode) {
153 this.code = exitCode;
154 }
155
156 /**
157 * Get the exit code as an int.
158 * @return the exit code as an int
159 */
160 public int getExitCode() {
161 return code;
162 }
163
164 @Override
165 public String toString() {
166 return String.valueOf(code);
167 }
168 }
169
170 /**
171 * Default constructor to allow for creation through reflection.
172 */
173 public LinuxContainerExecutor() {
174 }
175
176 /**
177 * Create a LinuxContainerExecutor with a provided
178 * {@link LinuxContainerRuntime}. Used primarily for testing.
179 *
180 * @param linuxContainerRuntime the runtime to use
181 */
182 public LinuxContainerExecutor(LinuxContainerRuntime linuxContainerRuntime) {
183 this.linuxContainerRuntime = linuxContainerRuntime;
184 }
185
186 @Override
187 public void setConf(Configuration conf) {
188 super.setConf(conf);
189
190 resourcesHandler = getResourcesHandler(conf);
191
192 containerSchedPriorityIsSet = false;
193 if (conf.get(YarnConfiguration.NM_CONTAINER_EXECUTOR_SCHED_PRIORITY)
194 != null) {
195 containerSchedPriorityIsSet = true;
196 containerSchedPriorityAdjustment = conf
197 .getInt(YarnConfiguration.NM_CONTAINER_EXECUTOR_SCHED_PRIORITY,
198 YarnConfiguration.DEFAULT_NM_CONTAINER_EXECUTOR_SCHED_PRIORITY);
199 }
200 nonsecureLocalUser = conf.get(
201 YarnConfiguration.NM_NONSECURE_MODE_LOCAL_USER_KEY,
202 YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER);
203 nonsecureLocalUserPattern = Pattern.compile(
204 conf.get(YarnConfiguration.NM_NONSECURE_MODE_USER_PATTERN_KEY,
205 YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_USER_PATTERN));
206 containerLimitUsers = conf.getBoolean(
207 YarnConfiguration.NM_NONSECURE_MODE_LIMIT_USERS,
208 YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LIMIT_USERS);
209 if (!containerLimitUsers) {
210 LOG.warn(YarnConfiguration.NM_NONSECURE_MODE_LIMIT_USERS +
211 ": impersonation without authentication enabled");
212 }
213 }
214
215 private LCEResourcesHandler getResourcesHandler(Configuration conf) {
216 LCEResourcesHandler handler = ReflectionUtils.newInstance(
217 conf.getClass(YarnConfiguration.NM_LINUX_CONTAINER_RESOURCES_HANDLER,
218 DefaultLCEResourcesHandler.class, LCEResourcesHandler.class), conf);
219
220 // Stop using CgroupsLCEResourcesHandler
221 // use the resource handler chain instead
222 // ResourceHandlerModule will create the cgroup cpu module if
223 // CgroupsLCEResourcesHandler is set
224 if (handler instanceof CgroupsLCEResourcesHandler) {
225 handler =
226 ReflectionUtils.newInstance(DefaultLCEResourcesHandler.class, conf);
227 }
228 handler.setConf(conf);
229 return handler;
230 }
231
232 void verifyUsernamePattern(String user) {
233 if (!UserGroupInformation.isSecurityEnabled() &&
234 !nonsecureLocalUserPattern.matcher(user).matches()) {
235 throw new IllegalArgumentException("Invalid user name '" + user + "'," +
236 " it must match '" + nonsecureLocalUserPattern.pattern() + "'");
237 }
238 }
239
240 String getRunAsUser(String user) {
241 if (UserGroupInformation.isSecurityEnabled() ||
242 !containerLimitUsers) {
243 return user;
244 } else {
245 return nonsecureLocalUser;
246 }
247 }
248
249 /**
250 * Get the path to the {@code container-executor} binary. The path will
251 * be absolute.
252 *
253 * @param conf the {@link Configuration}
254 * @return the path to the {@code container-executor} binary
255 */
256 protected String getContainerExecutorExecutablePath(Configuration conf) {
257 String yarnHomeEnvVar =
258 System.getenv(ApplicationConstants.Environment.HADOOP_YARN_HOME.key());
259 File hadoopBin = new File(yarnHomeEnvVar, "bin");
260 String defaultPath =
261 new File(hadoopBin, "container-executor").getAbsolutePath();
262 return null == conf
263 ? defaultPath
264 : conf.get(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH,
265 defaultPath);
266 }
267
268 /**
269 * Add a niceness level to the process that will be executed. Adds
270 * {@code -n <nice>} to the given command. The niceness level will be
271 * taken from the
272 * {@code yarn.nodemanager.container-executer.os.sched.prioity} property.
273 *
274 * @param command the command to which to add the niceness setting.
275 */
276 protected void addSchedPriorityCommand(List<String> command) {
277 if (containerSchedPriorityIsSet) {
278 command.addAll(Arrays.asList("nice", "-n",
279 Integer.toString(containerSchedPriorityAdjustment)));
280 }
281 }
282
283 protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
284 return PrivilegedOperationExecutor.getInstance(getConf());
285 }
286
287 @Override
288 public void init(Context nmContext) throws IOException {
289 Configuration conf = super.getConf();
290
291 // Send command to executor which will just start up,
292 // verify configuration/permissions and exit
293 try {
294 PrivilegedOperation checkSetupOp = new PrivilegedOperation(
295 PrivilegedOperation.OperationType.CHECK_SETUP);
296 PrivilegedOperationExecutor privilegedOperationExecutor =
297 getPrivilegedOperationExecutor();
298
299 privilegedOperationExecutor.executePrivilegedOperation(checkSetupOp,
300 false);
301 } catch (PrivilegedOperationException e) {
302 int exitCode = e.getExitCode();
303 LOG.warn("Exit code from container executor initialization is : "
304 + exitCode, e);
305
306 throw new IOException("Linux container executor not configured properly"
307 + " (error=" + exitCode + ")", e);
308 }
309
310 try {
311 resourceHandlerChain = ResourceHandlerModule
312 .getConfiguredResourceHandlerChain(conf, nmContext);
313 if (LOG.isDebugEnabled()) {
314 LOG.debug("Resource handler chain enabled = " + (resourceHandlerChain
315 != null));
316 }
317 if (resourceHandlerChain != null) {
318 LOG.debug("Bootstrapping resource handler chain");
319 resourceHandlerChain.bootstrap(conf);
320 }
321 } catch (ResourceHandlerException e) {
322 LOG.error("Failed to bootstrap configured resource subsystems! ", e);
323 throw new IOException(
324 "Failed to bootstrap configured resource subsystems!");
325 }
326
327 try {
328 if (linuxContainerRuntime == null) {
329 LinuxContainerRuntime runtime = new DelegatingLinuxContainerRuntime();
330
331 runtime.initialize(conf, nmContext);
332 this.linuxContainerRuntime = runtime;
333 }
334 } catch (ContainerExecutionException e) {
335 LOG.error("Failed to initialize linux container runtime(s)!", e);
336 throw new IOException("Failed to initialize linux container runtime(s)!");
337 }
338
339 resourcesHandler.init(this);
340 }
341
342 @Override
343 public void startLocalizer(LocalizerStartContext ctx)
344 throws IOException, InterruptedException {
345 Path nmPrivateContainerTokensPath = ctx.getNmPrivateContainerTokens();
346 InetSocketAddress nmAddr = ctx.getNmAddr();
347 String user = ctx.getUser();
348 String appId = ctx.getAppId();
349 String locId = ctx.getLocId();
350 LocalDirsHandlerService dirsHandler = ctx.getDirsHandler();
351 List<String> localDirs = dirsHandler.getLocalDirs();
352 List<String> logDirs = dirsHandler.getLogDirs();
353
354 verifyUsernamePattern(user);
355 String runAsUser = getRunAsUser(user);
356 PrivilegedOperation initializeContainerOp = new PrivilegedOperation(
357 PrivilegedOperation.OperationType.INITIALIZE_CONTAINER);
358 List<String> prefixCommands = new ArrayList<>();
359
360 addSchedPriorityCommand(prefixCommands);
361 initializeContainerOp.appendArgs(
362 runAsUser,
363 user,
364 Integer.toString(
365 PrivilegedOperation.RunAsUserCommand.INITIALIZE_CONTAINER
366 .getValue()),
367 appId,
368 locId,
369 nmPrivateContainerTokensPath.toUri().getPath().toString(),
370 StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR,
371 localDirs),
372 StringUtils.join(PrivilegedOperation.LINUX_FILE_PATH_SEPARATOR,
373 logDirs));
374
375 File jvm = // use same jvm as parent
376 new File(new File(System.getProperty("java.home"), "bin"), "java");
377 initializeContainerOp.appendArgs(jvm.toString());
378 initializeContainerOp.appendArgs("-classpath");
379 initializeContainerOp.appendArgs(System.getProperty("java.class.path"));
380 String javaLibPath = System.getProperty("java.library.path");
381 if (javaLibPath != null) {
382 initializeContainerOp.appendArgs("-Djava.library.path=" + javaLibPath);
383 }
384
385 initializeContainerOp.appendArgs(ContainerLocalizer.getJavaOpts(getConf()));
386
387 List<String> localizerArgs = new ArrayList<>();
388
389 buildMainArgs(localizerArgs, user, appId, locId, nmAddr, localDirs);
390
391 Path containerLogDir = getContainerLogDir(dirsHandler, appId, locId);
392 localizerArgs = replaceWithContainerLogDir(localizerArgs, containerLogDir);
393
394 initializeContainerOp.appendArgs(localizerArgs);
395
396 try {
397 Configuration conf = super.getConf();
398 PrivilegedOperationExecutor privilegedOperationExecutor =
399 getPrivilegedOperationExecutor();
400
401 privilegedOperationExecutor.executePrivilegedOperation(prefixCommands,
402 initializeContainerOp, null, null, false, true);
403
404 } catch (PrivilegedOperationException e) {
405 int exitCode = e.getExitCode();
406 LOG.warn("Exit code from container " + locId + " startLocalizer is : "
407 + exitCode, e);
408
409 throw new IOException("Application " + appId + " initialization failed" +
410 " (exitCode=" + exitCode + ") with output: " + e.getOutput(), e);
411 }
412 }
413
414 private List<String> replaceWithContainerLogDir(List<String> commands,
415 Path containerLogDir) {
416 List<String> newCmds = new ArrayList<>(commands.size());
417
418 for (String item : commands) {
419 newCmds.add(item.replace(ApplicationConstants.LOG_DIR_EXPANSION_VAR,
420 containerLogDir.toString()));
421 }
422
423 return newCmds;
424 }
425
426 private Path getContainerLogDir(LocalDirsHandlerService dirsHandler,
427 String appId, String containerId) throws IOException {
428 String relativeContainerLogDir = ContainerLaunch
429 .getRelativeContainerLogDir(appId, containerId);
430
431 return dirsHandler.getLogPathForWrite(relativeContainerLogDir,
432 false);
433 }
434
435 /**
436 * Set up the {@link ContainerLocalizer}.
437 *
438 * @param command the current ShellCommandExecutor command line
439 * @param user localization user
440 * @param appId localized app id
441 * @param locId localizer id
442 * @param nmAddr nodemanager address
443 * @param localDirs list of local dirs
444 * @see ContainerLocalizer#buildMainArgs
445 */
446 @VisibleForTesting
447 public void buildMainArgs(List<String> command, String user, String appId,
448 String locId, InetSocketAddress nmAddr, List<String> localDirs) {
449 ContainerLocalizer.buildMainArgs(command, user, appId, locId, nmAddr,
450 localDirs, super.getConf());
451 }
452
453 @Override
454 public void prepareContainer(ContainerPrepareContext ctx) throws IOException {
455
456 ContainerRuntimeContext.Builder builder =
457 new ContainerRuntimeContext.Builder(ctx.getContainer());
458
459 builder.setExecutionAttribute(LOCALIZED_RESOURCES,
460 ctx.getLocalizedResources())
461 .setExecutionAttribute(USER, ctx.getUser())
462 .setExecutionAttribute(CONTAINER_LOCAL_DIRS,
463 ctx.getContainerLocalDirs())
464 .setExecutionAttribute(CONTAINER_RUN_CMDS, ctx.getCommands())
465 .setExecutionAttribute(CONTAINER_ID_STR,
466 ctx.getContainer().getContainerId().toString());
467
468 try {
469 linuxContainerRuntime.prepareContainer(builder.build());
470 } catch (ContainerExecutionException e) {
471 throw new IOException("Unable to prepare container: ", e);
472 }
473 }
474
475 @Override
476 protected void updateEnvForWhitelistVars(Map<String, String> env) {
477 if (linuxContainerRuntime.useWhitelistEnv(env)) {
478 super.updateEnvForWhitelistVars(env);
479 }
480 }
481
482 @Override
483 public int launchContainer(ContainerStartContext ctx)
484 throws IOException, ConfigurationException {
485 Container container = ctx.getContainer();
486 String user = ctx.getUser();
487
488 verifyUsernamePattern(user);
489
490 ContainerId containerId = container.getContainerId();
491
492 resourcesHandler.preExecute(containerId,
493 container.getResource());
494 String resourcesOptions = resourcesHandler.getResourcesOption(containerId);
495 String tcCommandFile = null;
496
497 try {
498 if (resourceHandlerChain != null) {
499 List<PrivilegedOperation> ops = resourceHandlerChain
500 .preStart(container);
501
502 if (ops != null) {
503 List<PrivilegedOperation> resourceOps = new ArrayList<>();
504
505 resourceOps.add(new PrivilegedOperation(
506 PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
507 resourcesOptions));
508
509 for (PrivilegedOperation op : ops) {
510 switch (op.getOperationType()) {
511 case ADD_PID_TO_CGROUP:
512 resourceOps.add(op);
513 break;
514 case TC_MODIFY_STATE:
515 tcCommandFile = op.getArguments().get(0);
516 break;
517 default:
518 LOG.warn("PrivilegedOperation type unsupported in launch: "
519 + op.getOperationType());
520 }
521 }
522
523 if (resourceOps.size() > 1) {
524 //squash resource operations
525 try {
526 PrivilegedOperation operation = PrivilegedOperationExecutor
527 .squashCGroupOperations(resourceOps);
528 resourcesOptions = operation.getArguments().get(0);
529 } catch (PrivilegedOperationException e) {
530 LOG.error("Failed to squash cgroup operations!", e);
531 throw new ResourceHandlerException(
532 "Failed to squash cgroup operations!");
533 }
534 }
535 }
536 }
537 } catch (ResourceHandlerException e) {
538 LOG.error("ResourceHandlerChain.preStart() failed!", e);
539 throw new IOException("ResourceHandlerChain.preStart() failed!", e);
540 }
541
542 try {
543 Path pidFilePath = getPidFilePath(containerId);
544 if (pidFilePath != null) {
545
546 ContainerRuntimeContext runtimeContext = buildContainerRuntimeContext(
547 ctx, pidFilePath, resourcesOptions, tcCommandFile);
548
549 linuxContainerRuntime.launchContainer(runtimeContext);
550 } else {
551 LOG.info(
552 "Container was marked as inactive. Returning terminated error");
553 return ContainerExecutor.ExitCode.TERMINATED.getExitCode();
554 }
555 } catch (ContainerExecutionException e) {
556 int exitCode = e.getExitCode();
557 LOG.warn("Exit code from container " + containerId + " is : " + exitCode);
558 // 143 (SIGTERM) and 137 (SIGKILL) exit codes means the container was
559 // terminated/killed forcefully. In all other cases, log the
560 // output
561 if (exitCode != ContainerExecutor.ExitCode.FORCE_KILLED.getExitCode()
562 && exitCode != ContainerExecutor.ExitCode.TERMINATED.getExitCode()) {
563 LOG.warn("Exception from container-launch with container ID: "
564 + containerId + " and exit code: " + exitCode, e);
565
566 StringBuilder builder = new StringBuilder();
567 builder.append("Exception from container-launch.\n");
568 builder.append("Container id: " + containerId + "\n");
569 builder.append("Exit code: " + exitCode + "\n");
570 if (!Optional.fromNullable(e.getErrorOutput()).or("").isEmpty()) {
571 builder.append("Exception message: " + e.getErrorOutput() + "\n");
572 }
573 //Skip stack trace
574 String output = e.getOutput();
575 if (output != null && !e.getOutput().isEmpty()) {
576 builder.append("Shell output: " + output + "\n");
577 }
578 String diagnostics = builder.toString();
579 logOutput(diagnostics);
580 container.handle(new ContainerDiagnosticsUpdateEvent(containerId,
581 diagnostics));
582 if (exitCode ==
583 ExitCode.INVALID_CONTAINER_EXEC_PERMISSIONS.getExitCode() ||
584 exitCode ==
585 ExitCode.INVALID_CONFIG_FILE.getExitCode() ||
586 exitCode ==
587 ExitCode.COULD_NOT_CREATE_SCRIPT_COPY.getExitCode() ||
588 exitCode ==
589 ExitCode.COULD_NOT_CREATE_CREDENTIALS_FILE.getExitCode() ||
590 exitCode ==
591 ExitCode.COULD_NOT_CREATE_WORK_DIRECTORIES.getExitCode() ||
592 exitCode ==
593 ExitCode.COULD_NOT_CREATE_APP_LOG_DIRECTORIES.getExitCode() ||
594 exitCode ==
595 ExitCode.COULD_NOT_CREATE_TMP_DIRECTORIES.getExitCode()) {
596 throw new ConfigurationException(
597 "Linux Container Executor reached unrecoverable exception", e);
598 }
599 } else {
600 container.handle(new ContainerDiagnosticsUpdateEvent(containerId,
601 "Container killed on request. Exit code is " + exitCode));
602 }
603 return exitCode;
604 } finally {
605 resourcesHandler.postExecute(containerId);
606
607 try {
608 if (resourceHandlerChain != null) {
609 resourceHandlerChain.postComplete(containerId);
610 }
611 } catch (ResourceHandlerException e) {
612 LOG.warn("ResourceHandlerChain.postComplete failed for " +
613 "containerId: " + containerId + ". Exception: " + e);
614 }
615 }
616
617 return 0;
618 }
619
620 private ContainerRuntimeContext buildContainerRuntimeContext(
621 ContainerStartContext ctx, Path pidFilePath,
622 String resourcesOptions, String tcCommandFile) {
623
624 List<String> prefixCommands = new ArrayList<>();
625 addSchedPriorityCommand(prefixCommands);
626
627 Container container = ctx.getContainer();
628
629 ContainerRuntimeContext.Builder builder = new ContainerRuntimeContext
630 .Builder(container);
631 if (prefixCommands.size() > 0) {
632 builder.setExecutionAttribute(CONTAINER_LAUNCH_PREFIX_COMMANDS,
633 prefixCommands);
634 }
635
636 builder.setExecutionAttribute(LOCALIZED_RESOURCES,
637 ctx.getLocalizedResources())
638 .setExecutionAttribute(RUN_AS_USER, getRunAsUser(ctx.getUser()))
639 .setExecutionAttribute(USER, ctx.getUser())
640 .setExecutionAttribute(APPID, ctx.getAppId())
641 .setExecutionAttribute(CONTAINER_ID_STR,
642 container.getContainerId().toString())
643 .setExecutionAttribute(CONTAINER_WORK_DIR, ctx.getContainerWorkDir())
644 .setExecutionAttribute(NM_PRIVATE_CONTAINER_SCRIPT_PATH,
645 ctx.getNmPrivateContainerScriptPath())
646 .setExecutionAttribute(NM_PRIVATE_TOKENS_PATH,
647 ctx.getNmPrivateTokensPath())
648 .setExecutionAttribute(PID_FILE_PATH, pidFilePath)
649 .setExecutionAttribute(LOCAL_DIRS, ctx.getLocalDirs())
650 .setExecutionAttribute(LOG_DIRS, ctx.getLogDirs())
651 .setExecutionAttribute(FILECACHE_DIRS, ctx.getFilecacheDirs())
652 .setExecutionAttribute(USER_LOCAL_DIRS, ctx.getUserLocalDirs())
653 .setExecutionAttribute(CONTAINER_LOCAL_DIRS, ctx.getContainerLocalDirs())
654 .setExecutionAttribute(USER_FILECACHE_DIRS, ctx.getUserFilecacheDirs())
655 .setExecutionAttribute(APPLICATION_LOCAL_DIRS,
656 ctx.getApplicationLocalDirs())
657 .setExecutionAttribute(CONTAINER_LOG_DIRS, ctx.getContainerLogDirs())
658 .setExecutionAttribute(RESOURCES_OPTIONS, resourcesOptions);
659
660 if (tcCommandFile != null) {
661 builder.setExecutionAttribute(TC_COMMAND_FILE, tcCommandFile);
662 }
663
664 return builder.build();
665 }
666
667 @Override
668 public String[] getIpAndHost(Container container)
669 throws ContainerExecutionException {
670 return linuxContainerRuntime.getIpAndHost(container);
671 }
672
673 @Override
674 public int reacquireContainer(ContainerReacquisitionContext ctx)
675 throws IOException, InterruptedException {
676 ContainerId containerId = ctx.getContainerId();
677
678 try {
679 //Resource handler chain needs to reacquire container state
680 //as well
681 if (resourceHandlerChain != null) {
682 try {
683 resourceHandlerChain.reacquireContainer(containerId);
684 } catch (ResourceHandlerException e) {
685 LOG.warn("ResourceHandlerChain.reacquireContainer failed for " +
686 "containerId: " + containerId + " Exception: " + e);
687 }
688 }
689
690 return super.reacquireContainer(ctx);
691 } finally {
692 resourcesHandler.postExecute(containerId);
693 if (resourceHandlerChain != null) {
694 try {
695 resourceHandlerChain.postComplete(containerId);
696 } catch (ResourceHandlerException e) {
697 LOG.warn("ResourceHandlerChain.postComplete failed for " +
698 "containerId: " + containerId + " Exception: " + e);
699 }
700 }
701 }
702 }
703
704 @Override
705 public boolean signalContainer(ContainerSignalContext ctx)
706 throws IOException {
707 Container container = ctx.getContainer();
708 String user = ctx.getUser();
709 String pid = ctx.getPid();
710 Signal signal = ctx.getSignal();
711
712 verifyUsernamePattern(user);
713 String runAsUser = getRunAsUser(user);
714
715 ContainerRuntimeContext runtimeContext = new ContainerRuntimeContext
716 .Builder(container)
717 .setExecutionAttribute(RUN_AS_USER, runAsUser)
718 .setExecutionAttribute(USER, user)
719 .setExecutionAttribute(PID, pid)
720 .setExecutionAttribute(SIGNAL, signal)
721 .build();
722
723 try {
724 linuxContainerRuntime.signalContainer(runtimeContext);
725 } catch (ContainerExecutionException e) {
726 int retCode = e.getExitCode();
727 if (retCode == PrivilegedOperation.ResultCode.INVALID_CONTAINER_PID
728 .getValue()) {
729 return false;
730 }
731 LOG.warn("Error in signalling container " + pid + " with " + signal
732 + "; exit = " + retCode, e);
733 logOutput(e.getOutput());
734 throw new IOException("Problem signalling container " + pid + " with "
735 + signal + "; output: " + e.getOutput() + " and exitCode: "
736 + retCode, e);
737 }
738 return true;
739 }
740
741 /**
742 * Performs the tasks necessary to reap the container.
743 *
744 * @param ctx Encapsulates information necessary for reaping containers.
745 * @return true if the reaping was successful.
746 * @throws IOException if an error occurs while reaping the container.
747 */
748 @Override
749 public boolean reapContainer(ContainerReapContext ctx) throws IOException {
750 Container container = ctx.getContainer();
751 String user = ctx.getUser();
752 String runAsUser = getRunAsUser(user);
753 ContainerRuntimeContext runtimeContext = new ContainerRuntimeContext
754 .Builder(container)
755 .setExecutionAttribute(RUN_AS_USER, runAsUser)
756 .setExecutionAttribute(USER, user)
757 .build();
758 try {
759 linuxContainerRuntime.reapContainer(runtimeContext);
760 } catch (ContainerExecutionException e) {
761 int retCode = e.getExitCode();
762 if (retCode != 0) {
763 return false;
764 }
765 LOG.warn("Error in reaping container "
766 + container.getContainerId().toString() + " exit = " + retCode, e);
767 logOutput(e.getOutput());
768 throw new IOException("Error in reaping container "
769 + container.getContainerId().toString() + " exit = " + retCode, e);
770 }
771 return true;
772 }
773
774 @Override
775 public void deleteAsUser(DeletionAsUserContext ctx) {
776 String user = ctx.getUser();
777 Path dir = ctx.getSubDir();
778 List<Path> baseDirs = ctx.getBasedirs();
779
780 verifyUsernamePattern(user);
781
782 String runAsUser = getRunAsUser(user);
783 String dirString = dir == null ? "" : dir.toUri().getPath();
784
785 PrivilegedOperation deleteAsUserOp = new PrivilegedOperation(
786 PrivilegedOperation.OperationType.DELETE_AS_USER, (String) null);
787
788 deleteAsUserOp.appendArgs(
789 runAsUser,
790 user,
791 Integer.toString(PrivilegedOperation.
792 RunAsUserCommand.DELETE_AS_USER.getValue()),
793 dirString);
794
795 List<String> pathsToDelete = new ArrayList<String>();
796 if (baseDirs == null || baseDirs.size() == 0) {
797 LOG.info("Deleting absolute path : " + dir);
798 pathsToDelete.add(dirString);
799 } else {
800 for (Path baseDir : baseDirs) {
801 Path del = dir == null ? baseDir : new Path(baseDir, dir);
802 LOG.info("Deleting path : " + del);
803 pathsToDelete.add(del.toString());
804 deleteAsUserOp.appendArgs(baseDir.toUri().getPath());
805 }
806 }
807
808 try {
809 Configuration conf = super.getConf();
810 PrivilegedOperationExecutor privilegedOperationExecutor =
811 getPrivilegedOperationExecutor();
812
813 privilegedOperationExecutor.executePrivilegedOperation(deleteAsUserOp,
814 false);
815 } catch (PrivilegedOperationException e) {
816 int exitCode = e.getExitCode();
817 LOG.error("DeleteAsUser for " + StringUtils.join(" ", pathsToDelete)
818 + " returned with exit code: " + exitCode, e);
819 }
820 }
821
822 @Override
823 protected File[] readDirAsUser(String user, Path dir) {
824 List<File> files = new ArrayList<>();
825 PrivilegedOperation listAsUserOp = new PrivilegedOperation(
826 PrivilegedOperation.OperationType.LIST_AS_USER, (String)null);
827 String runAsUser = getRunAsUser(user);
828 String dirString = "";
829
830 if (dir != null) {
831 dirString = dir.toUri().getPath();
832 }
833
834 listAsUserOp.appendArgs(runAsUser, user,
835 Integer.toString(
836 PrivilegedOperation.RunAsUserCommand.LIST_AS_USER.getValue()),
837 dirString);
838
839 try {
840 PrivilegedOperationExecutor privOpExecutor =
841 getPrivilegedOperationExecutor();
842
843 String results =
844 privOpExecutor.executePrivilegedOperation(listAsUserOp, true);
845
846 for (String file: results.split("\n")) {
847 // The container-executor always dumps its log output to stdout, which
848 // includes 3 lines that start with "main : "
849 if (!file.startsWith("main :")) {
850 files.add(new File(new File(dirString), file));
851 }
852 }
853 } catch (PrivilegedOperationException e) {
854 LOG.error("ListAsUser for " + dir + " returned with exit code: "
855 + e.getExitCode(), e);
856 }
857
858 return files.toArray(new File[files.size()]);
859 }
860
861 @Override
862 public void symLink(String target, String symlink) {
863
864 }
865
866 @Override
867 public boolean isContainerAlive(ContainerLivenessContext ctx)
868 throws IOException {
869 String user = ctx.getUser();
870 String pid = ctx.getPid();
871 Container container = ctx.getContainer();
872
873 // Send a test signal to the process as the user to see if it's alive
874 return signalContainer(new ContainerSignalContext.Builder()
875 .setContainer(container)
876 .setUser(user)
877 .setPid(pid)
878 .setSignal(Signal.NULL)
879 .build());
880 }
881
882 /**
883 * Mount a CGROUPS controller at the requested mount point and create
884 * a hierarchy for the NodeManager to manage.
885 *
886 * @param cgroupKVs a key-value pair of the form
887 * {@code controller=mount-path}
888 * @param hierarchy the top directory of the hierarchy for the NodeManager
889 * @throws IOException if there is a problem mounting the CGROUPS
890 */
891 public void mountCgroups(List<String> cgroupKVs, String hierarchy)
892 throws IOException {
893 try {
894 PrivilegedOperation mountCGroupsOp = new PrivilegedOperation(
895 PrivilegedOperation.OperationType.MOUNT_CGROUPS, hierarchy);
896 Configuration conf = super.getConf();
897
898 mountCGroupsOp.appendArgs(cgroupKVs);
899 PrivilegedOperationExecutor privilegedOperationExecutor =
900 getPrivilegedOperationExecutor();
901
902 privilegedOperationExecutor.executePrivilegedOperation(mountCGroupsOp,
903 false);
904 } catch (PrivilegedOperationException e) {
905 int exitCode = e.getExitCode();
906 LOG.warn("Exception in LinuxContainerExecutor mountCgroups ", e);
907
908 throw new IOException("Problem mounting cgroups " + cgroupKVs +
909 "; exit code = " + exitCode + " and output: " + e.getOutput(),
910 e);
911 }
912 }
913
914 @VisibleForTesting
915 public ResourceHandler getResourceHandler() {
916 return resourceHandlerChain;
917 }
918
919 /**
920 * Remove the docker container referenced in the context.
921 *
922 * @param containerId the containerId for the container.
923 */
924 public void removeDockerContainer(String containerId) {
925 try {
926 PrivilegedOperationExecutor privOpExecutor =
927 PrivilegedOperationExecutor.getInstance(super.getConf());
928 if (DockerCommandExecutor.isRemovable(
929 DockerCommandExecutor.getContainerStatus(containerId,
930 super.getConf(), privOpExecutor))) {
931 LOG.info("Removing Docker container : " + containerId);
932 DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId);
933 DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId,
934 null, super.getConf(), privOpExecutor, false);
935 }
936 } catch (ContainerExecutionException e) {
937 LOG.warn("Unable to remove docker container: " + containerId);
938 }
939 }
940 }