/***************************************************************************
* Copyright (c) 2012-2013 VMware, Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
package com.vmware.aurora.vc.vcservice;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.RejectedExecutionException;
import org.apache.log4j.Logger;
import com.vmware.aurora.exception.AuroraException;
import com.vmware.aurora.exception.VcException;
import com.vmware.aurora.util.AuAssert;
import com.vmware.aurora.vc.MoUtil;
import com.vmware.aurora.vc.VcTask;
import com.vmware.aurora.vc.VcTaskMgr;
import com.vmware.aurora.vc.vcevent.VcEventListener;
import com.vmware.aurora.vc.vcservice.VcConnectionStatusChangeEvent.VcConnectionStatusChangeCallback;
import com.vmware.vim.binding.vim.AboutInfo;
import com.vmware.vim.binding.vim.fault.NotAuthenticated;
import com.vmware.vim.binding.vmodl.ManagedObjectReference;
import com.vmware.vim.vmomi.core.exception.InternalException;
import com.vmware.vim.vmomi.core.exception.UnmarshallException;
import com.vmware.vim.vmomi.core.types.VmodlContext;
/**
* Here is a general discussion on VC interface.
*
* VC MANAGED AND DATA OBJECTS
*
* There are two types of objects in VC: managed objects and data objects.
*
* Managed objects provide interfaces for performing queries and run
* tasks on an VC object. These are API level interfaces that don't actually
* contain data.
*
* Data objects contain values of VC objects at a given instance. It is
* obtained through queries on managed objects. Data objects may nest
* and accessing child objects do not require new VC queries.
*
* All managed objects related to Aurora are mapped to subclasses of
* {@link VcObject}. Most VC operations are performed through methods of
* these classes. One exception is {@link VcFileManager}, which contains
* some services for file operations.
*
* {@link VcObject} contains snapshots of VC object data through
* its member variables that point to VC data objects.
* Once initialized, all data referenced by its member variables
* can be accessed without new queries to VC. We (aurora) can
* add access functions to get a sub-data-object or a field of it.
*
* Refer to comments in class {@link VcObject} for more info.
*
* VC SESSIONS
*
* In order to deal with problems with VC connection interruption
* and errors, we enclose a section of code that accesses VC with
* wrapper class so that the code can be retried if exceptions are
* thrown due to VC connection errors. The section of code wrapped
* in a {@link body()} function is refered to as VC session.
* An abstract class {@link VcSession} defines the abstraction of
* the session for execution in a VC context. Depending on different
* VC operations, one or two of the following methods may be used
* to program VC accesses.
*
* VC OPERATIONS
*
* In general, there are two types of VC operations.
*
* 1. Queries that only read VC states.
* These operations are stateless and can be repeated without
* side effects. We support having these queries running in
* a VcSession through {@link inVcSessionDo}. Such a session
* can be embedded in SagaSteps that run CMS transactions.
* Refer to {@link com.vmware.aurora.vc.test.CreateRbSaga.CreateRbStep}
* for code example.
* 2. Queries that modifies VC states or run as VC tasks.
* These operations need to run in a Saga Step that extends
* {@link VcTaskSagaStep}. This Saga step cannot do CMS transactions
* in the execution. Refer to {@link VcTaskSagaStep} for usage
* and {@link com.vmware.aurora.vc.test.VmOpSaga} for code examples.
*
*/
/**
* {@link VcContext} class manages VC service life cycle.
*
* All accesses to VC should run from VC context.
*/
public class VcContext {
private static Logger logger = Logger.getLogger(VcContext.class);
/* vc connectivity requirements. */
private final static int vcRequiredMajorVersion = 5; // Starting with VC 5.0.
private final static String vcApiName = "VirtualCenter";
private final static String esxApiName = "HostAgent";
/*
* The global VC service object representing a VC session, initialized
* when system is connected to VC.
*/
private static VcService vcService = null;
/* A global VC session dedicated to Event Listener. */
private static VcService eventService = null;
private static VcEventListener eventListener = null;
private static VcTaskMgr taskMgr = new VcTaskMgr();
/* A global VC session dedicated to long running VC calls. */
private static VcService longCallService = null;
private static VcLongCallHandler longCallHandler = null;
private static VcCleaner vcCleaner;
private static String vcVersion;
private static boolean initialized = false; // Configured & initialized.
private static boolean shutdown = false; // Shutdown request received.
private static boolean fatalInitError = false; // Retrying will not help.
private static int genCount = 0; // Init attempt genCount.
private static Map<VcConnectionStatusChangeEvent, VcConnectionStatusChangeCallback> statusChangeEventCallback =
new HashMap<VcConnectionStatusChangeEvent, VcConnectionStatusChangeCallback>();
private static int eventCount = 0;
/**
* A unique identifier of the VC server, initialized once when vcService
* is created.
*/
private static String serverGuid = null;
public static String getServerGuid() { return serverGuid; }
/**
* Return VC version if a connection is established.
* @return vc version
*/
public static String getVcVersion() {
return vcVersion;
}
/**
* Are we in the process of shutting down VcContext?
* @return true if shutdown was requested
*/
public static synchronized boolean isShuttingDown() {
return shutdown;
}
/**
* Report the connection state with vCenter.
* @return true if connected with vCenter
*/
public static boolean isServiceConnected() {
VcService svc = vcService;
return svc != null && svc.isConnected();
}
/**
* Returns global event listener.
* @return event listener
*/
public static VcEventListener getEventListener() {
return eventListener;
}
/**
* @return VC long call handler
*/
public static VcLongCallHandler getVcLongCallHandler() {
return longCallHandler;
}
/**
* This should only be called from {\link VcSession}.
* @return global task manager.
*/
public static VcTaskMgr getGlobalTaskMgr() {
return taskMgr;
}
/**
* A thread local variable to represent the current VC session.
*
* In Aurora's terminology, a VC session is a chunk of code that
* accesses VC in a single thread. We try to repeat a VC session
* if the underlying code generates recoverable exceptions,
* such as VC connection failures.
*
* For stateful VC operations that rely on vmodl's session,
* such as property collector, events collector, they shouldn't
* use VC session wrappers.
*/
private static ThreadLocal<VcService> tVcService = new ThreadLocal<VcService>() ;
private static ThreadLocal<VcSession<?>> tVcSession = new ThreadLocal<VcSession<?>>();
/**
* @return a TLS VC session object.
* null if not accessed from a VC session.
*/
public static VcSession<?> getSession() {
return tVcSession.get();
}
/**
* @return a TLS service object to access VC,
* null if not accessed from a VC session.
*/
public static VcService getService() {
return tVcService.get();
}
/**
* @return a TLS task manager object to issue tasks to VC,
* null if not accessed from a VC task session.
*/
public static VcTaskMgr getTaskMgr() {
return getSession().getTaskMgr();
}
/**
* @return VcCleaner a cleaning service for vc logout requests
*/
public static VcCleaner getVcCleaner() {
AuAssert.check(vcCleaner != null);
return vcCleaner;
}
/**
* @return true if the current thread in a VC session
*/
public static boolean isInSession() {
return tVcService.get() != null;
}
/**
* @return true if the current thread in a VC task session
*/
public static boolean isInTaskSession() {
return isInSession() && getSession().isTaskSession();
}
/**
* Returns the number of times the context was re-initialized.
* @return genCount
*/
public static int getGenCount() {
return genCount;
}
/**
* Triggers a VC connection status change event.
* @param type event type
* @param serviceName vc service name, maybe null
*/
public static void triggerEvent(VcConnectionStatusChangeEvent event, String serviceName) {
/*
* XXX Because vc sessions can be established from various loops, make sure we
* don't abuse the database too much. Delete once we have confidence.
*/
if (++eventCount > 10000) {
logger.error("Too many VC events");
return;
}
VcConnectionStatusChangeCallback callback = statusChangeEventCallback.get(event);
if (callback != null) {
callback.call(event, serviceName);
}
}
/**
* Called if initVc fails for any reason: cleans up any partially initialized
* state, logs event and throws an unchecked exception with given errorId.
* @param e exception to re-throw
* @param eventType event to log, if any
* @param fatalIniError true for fatal errors (retry will not help)
*/
private static void initFailure(VcException e, VcConnectionStatusChangeEvent eventType,
boolean fatalInitError) {
cleanup(eventType);
VcContext.fatalInitError = fatalInitError; // Dont't retry if true.
AuAssert.check(!initialized);
throw e;
}
private static void checkVcVersion(AboutInfo info) {
if (esxApiName.equals(info.getApiType())) {
initFailure(VcException.CONNECTING_TO_HOSTD(),
VcConnectionStatusChangeEvent.VC_VERSION_HOSTD, true);
} else if (!vcApiName.equals(info.getApiType())) {
initFailure(VcException.CONNECTING_TO_INVALID_PRODUCT(),
VcConnectionStatusChangeEvent.VC_VERSION_INVALID_PRODUCT, true);
}
String mVersionString;
Integer mVersion = 0;
try {
mVersionString = info.getApiVersion().split("\\.")[0];
mVersion = new Integer(mVersionString);
} catch (NumberFormatException e) {
initFailure(VcException.UNSUPPORTED_VERSION(e, info.getApiVersion()),
VcConnectionStatusChangeEvent.VC_VERSION_UNSUPPORTED, true);
}
if (mVersion < vcRequiredMajorVersion) {
initFailure(VcException.UNSUPPORTED_VERSION(null, info.getApiVersion()),
VcConnectionStatusChangeEvent.VC_VERSION_UNSUPPORTED, true);
}
}
/**
* Initialize three global vc sessions and save them in the context:
* - a general purpose vc session used by short lived requests
* - an event listener session, a long running service
* - an dedicated session for long running VC calls
* Also initialize all other auxiliary data structures and state.
* This function either completes the entire initialization successfully
* or rolls back partial initialization on any failure. In the latter case,
* we will attempt to re-initialize again at a later time on VC use.
*/
public static synchronized void initVcContext() {
AboutInfo info = null;
genCount++;
if (initialized) {
logger.info("VC already initialized");
return;
}
/*
* Don't attempt to relogin into VC on fatal errors (will fail again) or
* when we are in the middle of a shutdown operation (just logged out).
*/
if (fatalInitError) {
logger.warn("Fatal VC init error");
throw VcException.INIT_ERROR();
}
if (shutdown) {
logger.warn("initVcContext during shutdown");
throw VcException.SHUTDOWN_ERROR();
}
/* No partial initialization tolerated. */
AuAssert.check(vcCleaner == null &&
vcService == null &&
eventService == null);
vcCleaner = new VcCleaner();
try {
VmodlContext.getContext();
} catch(IllegalStateException ex) {
//Context has not been initialized. Continue to initialize
VmodlContext.initContext(new String[] { "com.vmware.vim.binding.vim" });
}
try {
vcService = new VcService("VcService");
info = vcService.getServiceInstanceContent().getAbout();
serverGuid = info.getInstanceUuid();
vcVersion = info.getVersion();
} catch (Exception e) {
logger.error("init vc context failed (vc session)");
initFailure(VcException.LOGIN_ERROR(e), null, false); // Retry later.
}
checkVcVersion(info);
try {
eventService = new VcService("VcEventService");
/*
* XXX Since Aurora is currently happy with resource pools from different
* datatcenters, we have to monitor the entire inventory starting with the
* root folder. Will need to find a way to reduce this to either a set
* of datacenters or, better, a single datacenter. Also see Bug 677287.
*/
VcContext.startEventListener(vcService.getServiceInstanceContent().getRootFolder());
} catch (Exception e) {
logger.error("init vc context failed (vc event listener)");
initFailure(VcException.LOGIN_ERROR(e), null, false); // Retry later.
}
try {
/*
* Set the HTTP timeout for long running VC calls to 5 minutes.
*/
longCallService = new VcService("VcLongCallService", false, 5 * 60 * 1000);
VcContext.startVcLongCallHandler();
} catch (Exception e) {
logger.error("init vc context failed (vc long call handler)");
initFailure(VcException.LOGIN_ERROR(e), null, false); // Retry later.
}
logger.info("init vc context succeeded");
initialized = true;
triggerEvent(VcConnectionStatusChangeEvent.VC_CONNECTED, null);
}
public static void initVcContext(Map<VcConnectionStatusChangeEvent, VcConnectionStatusChangeCallback> callback) {
VcContext.statusChangeEventCallback.putAll(callback);
initVcContext();
}
/**
* Shut down all VC services on system shutdown.
*/
public static synchronized void shutdown() {
logger.info("Shutting down VcContext");
shutdown = true;
cleanup(null);
logger.info("VcContext shut down");
}
private static synchronized void cleanup(VcConnectionStatusChangeEvent eventType) {
VcService svc = vcService;
VcService eventSvc = eventService;
VcService longTaskSvc = longCallService;
if (eventType != null) {
triggerEvent(eventType, null);
}
stopEventListener();
stopVcLongCallHandler();
eventService = null;
longCallService = null;
vcService = null;
vcVersion = null;
serverGuid = null;
if (svc != null) {
svc.logout(null);
}
if (eventSvc != null) {
eventSvc.logout(null);
}
if (longTaskSvc != null) {
longTaskSvc.logout(null);
}
if (vcCleaner != null) {
vcCleaner.shutDown(); // Will wait for all logout completions.
vcCleaner = null;
}
initialized = false;
}
// public for testing only
public static synchronized void startSession(VcSession<?> vcSession) {
if (shutdown) {
throw VcException.SHUTDOWN_ERROR();
}
AuAssert.check(initialized && vcService != null);
tVcSession.set(vcSession);
tVcService.set(vcService);
}
// public for testing only
public static void endSession() {
tVcService.set(null);
tVcSession.set(null);
}
/**
* A special long running session used by Event Listener thread.
* Does not need to be protected against shutdowns like the general
* session because this gets executed only once, initiated by the
* main thread that holds VcContext monitor.
*/
public static void startEventSession() {
AuAssert.check(eventService != null && !shutdown);
tVcService.set(eventService);
tVcSession.set(null);
}
/**
* A special long running session used by VcLongCallHandler thread.
* Does not need to be protected against shutdowns like the general
* session because this gets executed only once, initiated by the
* main thread that holds VcContext monitor.
*/
public static void startLongCallSession() {
AuAssert.check(longCallService != null && !shutdown);
tVcService.set(longCallService);
tVcSession.set(null);
}
private static void wrapAndThrow(Throwable e) throws AuroraException {
if (e instanceof AuroraException) {
throw (AuroraException)e;
}
throw VcException.GENERAL_ERROR(e);
}
/**
* Wrapper function for a VC session.
*
* Repeat the VC session in a loop if there is a VC connection
* runtime fault during execution.
*
* Only repeatable VC operations can be executed in this loop.
* For VC operations that have side effects, use VcSagaStep.
*
* @param <T> type of the return value of the VC session
* @param session defines the VC session in {@link session.body()}
* @return result of the VC session
*/
public static <T> T inVcSessionDo(VcSession<T> session) throws AuroraException {
if (isInSession()) {
AuAssert.check(initialized);
// Make sure that we don't reuse a non-task session for a task session
AuAssert.check(!session.isTaskSession() || isInTaskSession());
// Already in VC session, simply return.
try {
return session.body();
} catch (Throwable e) {
wrapAndThrow(e);
}
}
/*
* If failed to initialize on start-up, try again, but without retrying.
*/
if (!initialized) {
initVcContext();
AuAssert.check(initialized);
}
boolean needsRetry = false;
int retries = 0;
long waitTime = VcSession.VC_INIT_WAIT_TIME;
T result = null;
long genCount = -1L;
startSession(session);
// The try block should follow immediately, so that endSession will
// always be called if startSession succeeded.
do {
try {
AuAssert.check(isInSession());
// Login VC if we are logged out of VC service.
genCount = getService().login();
if (needsRetry) {
// undo previous invocation of body()
session.undo();
}
result = session.body();
needsRetry = false;
} catch (Throwable e) {
VcService svc = getService();
Long curGenCount = svc.getServiceGenCount();
endSession();
logger.debug(e);
if (e instanceof RejectedExecutionException ||
e instanceof IllegalStateException) {
/*
* It's likely that the current session was terminated by another thread,
* thus we receive this exception from java Executor. We should retry.
*/
if (curGenCount != null && genCount == svc.getServiceGenCount()) {
/* Log error if we got HTTP binding or thread executor exceptions
* in the same session.
*/
logger.error("genCount=" + genCount, e);
svc.dumpServiceStatus();
}
} else if (!(e instanceof NotAuthenticated ||
e instanceof InternalException ||
e instanceof UnmarshallException)) {
/*
* For a normal exception, we simply throw it.
*
* XXX need to verify that we caught the right exceptions
* The following code is by reading MethodInvocationHandlerImpl.java
* in vlsi code. We potentially didn't catch other interesting
* RuntimeExceptions.
*/
wrapAndThrow(e);
}
if (curGenCount != null && genCount != curGenCount) {
/* If another thread has updated the VcService instance,
* reset the retry counter for using the new service instance.
* This would slow down the VcSession failure rate
* if connection to VC experiences frequent hiccups.
*/
retries = 0;
genCount = curGenCount;
logger.info("reset retries with new genCounter=" + genCount);
} else if (++retries > session.retries) {
logger.error("failed to reconnect to VC connection");
// Failed N retries. Log out of VC and terminate with exception.
vcService.logout(genCount);
if ((e instanceof InternalException &&
MoUtil.isNetworkException(e.getCause())) ||
(e instanceof RejectedExecutionException) ||
(e instanceof IllegalStateException)) {
/* Either a network problem or VC is down. */
throw VcException.UNAVAILABLE_ERROR(e);
}
wrapAndThrow(e);
}
needsRetry = true;
/* This is likely caused by session timeout or has
* exceeded the threshold for forcing reconnection.
* Let's try to clean up, login again and retry.
* XXX needs to investigate what exceptions should do login.
*/
if (e instanceof NotAuthenticated ||
retries > session.reconnectThreshold) {
logger.info("try to reconnect for " + e);
genCount = vcService.reconnect(genCount);
/*
* If we managed to login and get VC connection,
* do not need to wait.
*/
} else {
try {
Thread.sleep(waitTime);
} catch (InterruptedException e1) {
logger.error("reconnect to VC connection interrupted");
wrapAndThrow(e);
}
// double wait time for next the round
waitTime = waitTime << 1;
}
startSession(session);
}
} while (needsRetry);
endSession();
return result;
}
/**
* A callback from a vc service executed on service reset. If this vc
* session was associated with Event Listener, need to re-establish
* Listener's server side state in the context of a new vc session.
*
* @param svc vc service that was (re)initialized
* @throws Exception
*/
public static void serviceReset(VcService svc) throws Exception {
if (svc == eventService && eventListener != null) {
eventListener.reset();
}
}
/**
* Start listening to events rooted in the specified target cluster
* or datacenter moRef in the context of vc event listener session.
* Cannot wait on VcTask completion until EventListener is started.
*
* @param targetMoRef either clutser or datacenter
* @throws Exception
*/
private static void startEventListener(ManagedObjectReference targetMoRef)
throws Exception {
eventListener = new VcEventListener(targetMoRef);
eventListener.setDaemon(true);
eventListener.start();
eventListener.waitUntilStarted();
/*
* If something went wrong during initialization of VcEventListener thread,
* rethrow exception in the parent thread context.
*/
if (eventListener.getInitException() != null) {
throw new Exception("VcEventListener failed to start",
eventListener.getInitException());
}
}
/**
* Safely stop Event Listener's thread.
*/
private static void stopEventListener() {
if (eventListener != null) {
eventListener.shutDown();
eventListener = null;
}
}
/**
* Start a thread polling for VC long calls.
*/
private static void startVcLongCallHandler() throws Exception {
longCallHandler = new VcLongCallHandler();
longCallHandler.setDaemon(true);
longCallHandler.start();
longCallHandler.waitUntilStarted();
}
/**
* Safely stop VC long call handler thread.
*/
private static void stopVcLongCallHandler() {
if (longCallHandler != null) {
longCallHandler.shutDown();
longCallHandler = null;
}
}
/**
* "Bad" tasks don't trigger recentTask[] property updates and we must poll
* for their completions. This function must be called by the poller to make
* sure that the registered TaskFinishedEvent handlers are called for "bad"
* tasks just like they are called for good ones. Synchronized prevents
* eventListener.shutDown().
* @param task
*/
public static synchronized void badTaskCompleted(VcTask task) throws Exception {
if (eventListener != null) {
eventListener.fireBadTaskFinishedEventHandlers(task);
}
}
}