eu.xtreemos.xosd.jobmng
Class JobMng

java.lang.Object
  extended by eu.xtreemos.system.eventmachine.stage.AbstractStage
      extended by eu.xtreemos.system.eventmachine.stage.AbstractReceivingStage
          extended by eu.xtreemos.system.eventmachine.stage.Abstract2wayStage
              extended by eu.xtreemos.xosd.jobmng.JobMng
All Implemented Interfaces:
eu.xtreemos.system.eventmachine.queue.IEventHandler, eu.xtreemos.system.eventmachine.stage.IStage

public class JobMng
extends eu.xtreemos.system.eventmachine.stage.Abstract2wayStage

Author:
martag, ales.cernivec@xlab.si, rnou, jgiralt, john

Nested Class Summary
(package private)  class JobMng.CleaningCronEntry
           
private  class JobMng.DependentJobsAddress
           
private  class JobMng.JobFunction
           
private  class JobMng.JobsUserInfo
           
private  class JobMng.ListJobIds
           
 class JobMng.RedirContext
           
private static class JobMng.RedirFuncs
           
private  class JobMng.ReservationCallBack
           
private  class JobMng.ResourceInfo
           
 
Field Summary
private  java.util.Hashtable<java.lang.String,java.lang.String> automaticReservations
           
(package private) static int BASIC
           
private  java.lang.String delim
           
private  java.util.Hashtable<java.lang.String,JobMng.DependentJobsAddress> depJobs
           
(package private) static int JOB_DEFINITION
           
private  java.util.Hashtable<java.lang.String,java.lang.Integer> jobIdContext
           
private  CJobMngConfig jobmngConfig
           
private  java.util.Hashtable<java.lang.String,XJob> jobsList
           
(package private) static org.apache.log4j.Logger logger
           
private  eu.xtreemos.xosd.utilities.metrics.JobMetrics metrics
           
private  eu.xtreemos.xosd.utilities.metrics.JobMetricsData metricsData
           
private  CommunicationAddress myAddr
           
(package private) static int RESOURCES_ALLOCATED
           
(package private) static int RESOURCES_CONSUMED
           
private  java.util.Random rGenerator
           
private  ServiceTrustStore sTrustStore
           
private  java.util.ArrayList<eu.xtreemos.xosd.utilities.metrics.MetricsDesc> systemMetrics
           
(package private) static org.apache.log4j.Logger tracer
           
(package private) static int USER_METRICS
           
private  java.util.Hashtable<java.lang.String,java.util.ArrayList<UsedResource>> userReservationsUsed
           
 
Fields inherited from class eu.xtreemos.system.eventmachine.stage.Abstract2wayStage
context, counter, curContext, sink
 
Fields inherited from class eu.xtreemos.system.eventmachine.stage.AbstractReceivingStage
queue
 
Fields inherited from class eu.xtreemos.system.eventmachine.stage.AbstractStage
handlerChain, handlerGroup, handlerThreads, name, running, serviceListeners
 
Constructor Summary
JobMng()
           
 
Method Summary
 void addDependence(java.lang.String FromJobId, java.lang.String toJobId, java.lang.String TAG, java.security.cert.X509Certificate userCtx)
          This function adds a new job dependence.
 void addDependenceDown(java.lang.String jobId, java.lang.String toJobId, java.lang.String tag, java.security.cert.X509Certificate userCtx)
           
 void addDependenceUp(java.lang.String jobId, java.lang.String FromJobId, java.lang.String tag, java.security.cert.X509Certificate userCtx)
           
 java.lang.Integer addJobMetric(java.lang.String jobId, eu.xtreemos.xosd.utilities.metrics.MetricsDesc metric, java.security.cert.X509Certificate userCtx)
          Adds a new user defined metric to the job.
private  void addSystemMetrics(java.lang.String jobId)
           
(package private)  boolean checkGridNodeForJobUnitRestart(java.util.ArrayList<CommunicationAddress> list, java.lang.String jobId, java.lang.Integer jobUnitId)
          verify that each job unit of a job resides on a separate grid node
 void chgUID(java.lang.String jobId, java.security.cert.X509Certificate oldUserCtx, java.security.cert.X509Certificate newUserCtx)
          Changes the UID associated to a jobId
 void cleanMonitoring(java.lang.String jobId)
          This method is called from CronDaemon some time after job's death to clean metrics' data structures.
 java.lang.String completeJobCreationCB(java.lang.Object result)
          Completes Job Creation (all directories filled)
 java.lang.String completeJobCreationCBE(java.lang.Object result)
          Callback for failure on JobCreation
 java.lang.String createJob(java.lang.String jsdlFile, java.lang.Boolean startJob, java.lang.String reservationID, java.security.cert.X509Certificate userCtx)
          Creates a job in the AEM based on the JSDL description.
 java.lang.Integer createProcess(java.lang.String jobId, java.lang.String JSDL, java.lang.String reservationId, CommunicationAddress resource, java.security.cert.X509Certificate userCtx)
          Creates a new Process inside the specified JobID In order to increase performance, user can specify the ReservationID.
 void deleteDependence(java.lang.String FromJobId, java.lang.String ToJobId, java.lang.String TAG, java.security.cert.X509Certificate userCtx)
          Deletes an existing dependence between two specific jobs
 void deleteDependenceDown(java.lang.String jobId, java.lang.String toJobId, java.lang.String tag, java.security.cert.X509Certificate userCtx)
           
 void deleteDependenceUp(java.lang.String jobId, java.lang.String FromJobId, java.lang.String tag, java.security.cert.X509Certificate userCtx)
           
 java.lang.Object execJob(CommunicationAddress addrResource)
          Executes the job in the resource specified
 void exitJob(java.lang.String jobId, java.lang.Integer exitValue, java.security.cert.X509Certificate userCtx)
          The job identified by the jobId is finished immediately (all the processes of the job) with the exit code provided.
 java.lang.Integer fillReservationCB(java.lang.Boolean value)
          Callback to fill up a reservation (infinite and shared), calls to getAllResourcesCB...
 java.lang.Integer fillReservationCBE(java.lang.Object value)
          Callback for error on fillReservation
 java.lang.Integer getAllResourcesCB(java.util.ArrayList<CommunicationAddress> list)
          Callback where I get a list of candidates.
 java.lang.String getHandledEventType()
           
 java.lang.Object getInfoResourcesCB(java.util.ArrayList<ResourceDescriptorRecord> RDR)
          Callback where I get information from a resource, and select one with the specified scheduling method.
 java.lang.Object getInfoResourcesCBE(java.util.ArrayList<ResourceDescriptorRecord> RDR)
          Callback where I get information from a resource (if it fails)
private  JobInfo getJInfo(java.lang.String jobId, int typeOfInfo, java.lang.String user)
           
 java.lang.String getJobId(CommunicationAddress resourceId, java.lang.Integer pid, java.security.cert.X509Certificate userCtx)
          Returns the jobID corresponding to a
 java.lang.String getJobIdCB(java.lang.String jobId)
           
 java.lang.String getJobInfo(java.lang.String jobId, java.lang.Integer flags, java.lang.Integer infoLevel, java.util.ArrayList<java.lang.String> metricsList, java.security.cert.X509Certificate userCtx)
          TODO: To be eventually deprecated.
 java.lang.String getJobInfoCB(java.lang.String info)
           
 java.lang.String getJobInfoCBE(java.lang.Object info)
           
 java.lang.Object getJobManagerAddressesCB(java.util.ArrayList<CommunicationAddress> addrJobList)
          range addresses equals range dependent jobs save job manager addresses, BARRIER: contact super job cp after all job manger addresses have been collected
 java.util.ArrayList<eu.xtreemos.xosd.utilities.metrics.MetricsDesc> getJobMetrics(java.lang.String jobId, java.security.cert.X509Certificate userCtx)
          Returns the list of available metrics for a specific job, both system and user defined.
 java.lang.String getJobsInfo(java.util.ArrayList<java.lang.String> jobIds, java.lang.Integer flags, java.lang.Integer infoLevel, java.util.ArrayList<java.lang.String> metrics, java.security.cert.X509Certificate userCtx)
          Returns the monitoring information of the requested jobs.
 java.util.ArrayList<java.lang.String> getJobsUser(java.lang.String userId, java.security.cert.X509Certificate userCtx)
          Returns all the jobIDs of the jobs belonging to the given user
 java.util.ArrayList<java.lang.String> getJobsUserCB(java.lang.Object obj)
           
 java.util.ArrayList<java.lang.String> getListOfDependences(java.lang.String jobID, java.lang.String tag, java.lang.Integer levels, java.lang.Integer direction, java.security.cert.X509Certificate userCtx)
          Returns the list of jobs that have a dependence to the given job.
 java.util.ArrayList<java.lang.String> getListOfDependencesCB(java.util.ArrayList<java.lang.String> depList)
           
 java.util.ArrayList<eu.xtreemos.xosd.utilities.metrics.MetricsDesc> getMetricsByScope(java.lang.String jobId, eu.xtreemos.xosd.utilities.metrics.MetricScope scope)
           
 java.lang.String getProcessInfoCB(java.lang.String info)
           
 java.lang.String getProcessInfoCBE(java.lang.Exception info)
           
 java.lang.String getReservationID(java.lang.String reservationID)
          Callback where I get reservationID and go to the createJob
 java.lang.Integer getReservationIDFromJobIDCB(java.lang.String reservationID)
          Callback where I get reservationID from a JobID and go to the runjob or createProcess
 void getReservationResourcesCB(java.util.ArrayList<ReservationSlot> list)
          Callback where I get the list of resources inside reservation.
 java.lang.Integer getReservationResourcesProcessCB(java.util.ArrayList<ReservationSlot> list)
          Callback where I get the list of resources inside reservation to execute a process.
 java.lang.Integer getResForProcStatusCB(java.util.ArrayList<ReservationSlot> resources)
           
 java.lang.Integer getResourcesFromReservationIDCB(java.util.ArrayList<ReservationSlot> resources)
          Callback to get the list of resources of the reservationID In case we don't have any resource in the reservation call getResources from ResMng and fill up the reservation
 java.lang.Integer getRIDForProcStatusCB(java.lang.String reservationID)
          Callback to get the reservationID of a job to enable procStatus buffering for each resource it has reserved.
 java.lang.String getSingleJobInfo(java.lang.String jobId, java.lang.Integer flags, java.lang.Integer infoLevel, java.util.ArrayList<java.lang.String> metricsList, java.security.cert.X509Certificate userCtx)
          This method is called from getJobsInfo or another jobMng via redirect.
 void handleEvent(java.lang.Object event)
           
 void identifyJobManagers(java.util.ArrayList<java.lang.String> dependentJobs, java.lang.String initialJobId, java.lang.String strategy, java.lang.String options, java.lang.String mode, java.security.cert.X509Certificate cert)
          identify job manager address of each job
 void init()
           
 void jobControl(java.lang.String jobId, java.lang.Integer ctrOp, java.security.cert.X509Certificate userCtx)
          Apply the operation Control to the specific jobId
 void jobFinished(java.lang.String jobId, java.lang.Integer exitStatus, CommunicationAddress execMng)
           
 void jobMonitoringControl(java.lang.String jobId, java.lang.Integer op, java.lang.String level, java.security.cert.X509Certificate userCtx)
          Update the characteristics of the monitorization of a job.
 void jobRunning(java.lang.String jobId)
           
 java.lang.Integer jobWait(java.lang.String jobId, java.security.cert.X509Certificate userCtx)
          Blocks the calling process until the job indicated finishes
 void lockCheckpoint(java.lang.String jobId, java.lang.String initialJobId, java.security.cert.X509Certificate userCert, CommunicationAddress jobCpAddr, CommunicationAddress superJobCpAddr, java.lang.String strategy, java.lang.String options, java.lang.String mode, java.util.ArrayList<java.lang.String> dependentJobs)
          lock a checkpoint action - private member jobslist needed piggyback jsdl file and executable name
 void lockRestart(java.lang.String jobId)
           
 void recreateJob(java.lang.String jobId, java.lang.String initialJobId, java.lang.String jsdlFileContent, java.lang.String checkpointVersion, CommunicationAddress superJobCpAddr, java.util.ArrayList<CommunicationAddress> jobResourceList, java.security.cert.X509Certificate userCert)
          set up structures that represent a job in AEM at restart
 java.lang.Object redirectFunctionCB(CommunicationAddress addrJob)
           
private  void removeBuffers(java.lang.String jobId)
          Removes buffered data associated to the job.
 java.lang.Integer removeJobMetric(java.lang.String jobId, java.lang.String metricName, java.security.cert.X509Certificate userCtx)
          Removes a user defined metric from the job.
private  void removeMetrics(java.lang.String jobId)
          Removes metric data associated to the job.
 java.lang.Object returnCB(java.lang.Object obj)
           
 java.lang.Object returnCBE(java.lang.Exception ex)
           
 java.lang.Integer runJob(java.lang.String jobId, java.lang.String reservationID, java.security.cert.X509Certificate userCtx)
           
 java.lang.Integer runJobRes(java.lang.String jobId, java.lang.String reservationID, CommunicationAddress resourceID, java.security.cert.X509Certificate userCtx)
           
 CommunicationAddress selectResource(java.util.ArrayList<CommunicationAddress> list, java.lang.Integer schedule)
          Returns a resource from a list of resources
 void sendEvent(java.lang.String jobId, java.lang.Integer signal, java.lang.Integer operation, java.util.ArrayList<java.lang.String> list, java.security.cert.X509Certificate userCtx)
          Sends an event to a job
 java.lang.Integer setMetricValue(java.lang.String jobId, java.lang.String metricName, CommunicationAddress resourceID, java.lang.Integer pid, java.lang.String value, java.security.cert.X509Certificate userCtx)
          Sets the value of a Metric.
 java.lang.Integer setMonitorBuffering(java.lang.String jobId, java.lang.String metricName, CommunicationAddress resourceID, java.lang.Integer pid, java.lang.Integer flags, java.security.cert.X509Certificate userCtx)
          Switches on and off buffering for the specified metric.
private  void setSystemMetrics()
           
 void unlockCheckpoint(java.lang.String jobId)
           
 void unlockRestart(java.lang.String jobId)
           
 void updateJobHints(java.lang.String jobId, java.lang.Integer operation, java.lang.String requirements, java.lang.Boolean wait, java.security.cert.X509Certificate userCtx)
          Modifies the hints passed in createJob function
 void updateJobMetric(java.lang.String jobId, eu.xtreemos.xosd.utilities.metrics.MetricsDesc metric, java.security.cert.X509Certificate userCtx)
          Not defined yet
 void updateJobRequirements(java.lang.String jobId, java.lang.Integer operation, java.lang.String requirements, java.lang.Boolean wait, java.security.cert.X509Certificate userCtx)
          Modifies the requirements passed in createJob function.
 java.lang.Object verifyPolicyHandler(ResourceMatching rmList)
          Starts job using the first resource from rmList.
 void waitForEvent(java.lang.String jobId, java.lang.Integer event, java.security.cert.X509Certificate userCtx)
          Blocks the calling process until "event" is received.
 
Methods inherited from class eu.xtreemos.system.eventmachine.stage.Abstract2wayStage
getContext, removeContext, SendException, SendException, SendException, SendReply, SendReply, SendReply, setSink
 
Methods inherited from class eu.xtreemos.system.eventmachine.stage.AbstractReceivingStage
dequeue, getSource
 
Methods inherited from class eu.xtreemos.system.eventmachine.stage.AbstractStage
addHandler, addHandler, addServiceListener, getName, getShortName, getThreadCount, notifyServiceInitialised, notifyServiceStarted, notifyServiceStopped, processEvent, removeHandler, removeServiceListener, setThreadCount, start, stop
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

sTrustStore

private ServiceTrustStore sTrustStore

jobmngConfig

private CJobMngConfig jobmngConfig

delim

private java.lang.String delim

jobsList

private java.util.Hashtable<java.lang.String,XJob> jobsList

userReservationsUsed

private java.util.Hashtable<java.lang.String,java.util.ArrayList<UsedResource>> userReservationsUsed

automaticReservations

private java.util.Hashtable<java.lang.String,java.lang.String> automaticReservations

jobIdContext

private java.util.Hashtable<java.lang.String,java.lang.Integer> jobIdContext

systemMetrics

private java.util.ArrayList<eu.xtreemos.xosd.utilities.metrics.MetricsDesc> systemMetrics

metrics

private eu.xtreemos.xosd.utilities.metrics.JobMetrics metrics

metricsData

private eu.xtreemos.xosd.utilities.metrics.JobMetricsData metricsData

rGenerator

private java.util.Random rGenerator

myAddr

private CommunicationAddress myAddr

logger

static final org.apache.log4j.Logger logger

tracer

static final org.apache.log4j.Logger tracer

BASIC

static final int BASIC

JOB_DEFINITION

static final int JOB_DEFINITION

RESOURCES_ALLOCATED

static final int RESOURCES_ALLOCATED

RESOURCES_CONSUMED

static final int RESOURCES_CONSUMED

USER_METRICS

static final int USER_METRICS

depJobs

private java.util.Hashtable<java.lang.String,JobMng.DependentJobsAddress> depJobs
Constructor Detail

JobMng

public JobMng()
Method Detail

init

public void init()
Specified by:
init in interface eu.xtreemos.system.eventmachine.stage.IStage
Overrides:
init in class eu.xtreemos.system.eventmachine.stage.AbstractStage

setSystemMetrics

private void setSystemMetrics()

addSystemMetrics

private void addSystemMetrics(java.lang.String jobId)

getMetricsByScope

public java.util.ArrayList<eu.xtreemos.xosd.utilities.metrics.MetricsDesc> getMetricsByScope(java.lang.String jobId,
                                                                                             eu.xtreemos.xosd.utilities.metrics.MetricScope scope)

getReservationID

public java.lang.String getReservationID(java.lang.String reservationID)
Callback where I get reservationID and go to the createJob

Returns:

getReservationIDFromJobIDCB

public java.lang.Integer getReservationIDFromJobIDCB(java.lang.String reservationID)
Callback where I get reservationID from a JobID and go to the runjob or createProcess

Returns:
an Integer with the returnValue of createProcess, -1 on error.

getResourcesFromReservationIDCB

public java.lang.Integer getResourcesFromReservationIDCB(java.util.ArrayList<ReservationSlot> resources)
Callback to get the list of resources of the reservationID In case we don't have any resource in the reservation call getResources from ResMng and fill up the reservation

Returns:
through fillReservationCB from ReservationManager if no resources received otherwise the returnValue of getAllResourcesCB

fillReservationCB

public java.lang.Integer fillReservationCB(java.lang.Boolean value)
Callback to fill up a reservation (infinite and shared), calls to getAllResourcesCB... Remember than Time constrained resources should be acquired with createReservation method...

Returns:
an Integer with the returnValue of runJob, -1 on error: exceptions logged

fillReservationCBE

public java.lang.Integer fillReservationCBE(java.lang.Object value)
Callback for error on fillReservation

Returns:
-2 Reservation not available

getRIDForProcStatusCB

public java.lang.Integer getRIDForProcStatusCB(java.lang.String reservationID)
Callback to get the reservationID of a job to enable procStatus buffering for each resource it has reserved.

Returns:
an Integer through getResForProcStatusCB

getResForProcStatusCB

public java.lang.Integer getResForProcStatusCB(java.util.ArrayList<ReservationSlot> resources)
Returns:
an Integer to be determined

completeJobCreationCB

public java.lang.String completeJobCreationCB(java.lang.Object result)
Completes Job Creation (all directories filled)

Returns:
an Integer to be determined

completeJobCreationCBE

public java.lang.String completeJobCreationCBE(java.lang.Object result)
Callback for failure on JobCreation

Returns:
an Integer to be determined

createJob

public java.lang.String createJob(java.lang.String jsdlFile,
                                  java.lang.Boolean startJob,
                                  java.lang.String reservationID,
                                  java.security.cert.X509Certificate userCtx)
                           throws java.lang.Exception
Creates a job in the AEM based on the JSDL description. The job can be automatically scheduled or just created, depending on the value of startJob. If a reservationId is provided, the job will be scheduled on that reservation. Otherwise, a negotiation/reservation process will be started when the job will be scheduled based on resource requirements and scheduling hints. If calls to VOPS are enabled, policy enforcement gets into the picture. XOSDCONSOLE annotation denotes that this method becomes a part of XConsole project and therefore be available to user usage and scripts.

Parameters:
jsdlFile - is the job description of the job to be created in JSDL format (content not path to jsdl file)
startJob - true if we want to run job now, false just to create it without running
reservationID - The previously created reservation were the job will run
userCtx - the user certificate.
Returns:
the JobId of the new job created TODO: Use the reservationId and the user certificate
Throws:
java.lang.Exception

runJob

public java.lang.Integer runJob(java.lang.String jobId,
                                java.lang.String reservationID,
                                java.security.cert.X509Certificate userCtx)
                         throws java.lang.Exception
Throws:
java.lang.Exception

runJobRes

public java.lang.Integer runJobRes(java.lang.String jobId,
                                   java.lang.String reservationID,
                                   CommunicationAddress resourceID,
                                   java.security.cert.X509Certificate userCtx)
                            throws java.lang.Exception
Throws:
java.lang.Exception

getInfoResourcesCB

public java.lang.Object getInfoResourcesCB(java.util.ArrayList<ResourceDescriptorRecord> RDR)
Callback where I get information from a resource, and select one with the specified scheduling method.

Returns:

getInfoResourcesCBE

public java.lang.Object getInfoResourcesCBE(java.util.ArrayList<ResourceDescriptorRecord> RDR)
Callback where I get information from a resource (if it fails)

Returns:

getAllResourcesCB

public java.lang.Integer getAllResourcesCB(java.util.ArrayList<CommunicationAddress> list)
Callback where I get a list of candidates.

Parameters:
list - of candidates.
Returns:
an Integer to be determined

getReservationResourcesCB

public void getReservationResourcesCB(java.util.ArrayList<ReservationSlot> list)
Callback where I get the list of resources inside reservation. TODO: is that really used as a callback?

Parameters:
list - of candidates.

getReservationResourcesProcessCB

public java.lang.Integer getReservationResourcesProcessCB(java.util.ArrayList<ReservationSlot> list)
Callback where I get the list of resources inside reservation to execute a process.

Parameters:
list - of candidates.
Returns:
an Integer with the returnValue of createProcess, -1 on error

selectResource

public CommunicationAddress selectResource(java.util.ArrayList<CommunicationAddress> list,
                                           java.lang.Integer schedule)
Returns a resource from a list of resources

Parameters:
list - list of suitable resources.
schedule - type of schedule applied (0: RANDOM (only available)) 1: Less USED (Calls resMng getResourceDescriptor and eventually calls ExecMng)
Returns:
resource selected

execJob

public java.lang.Object execJob(CommunicationAddress addrResource)
Executes the job in the resource specified


verifyPolicyHandler

public java.lang.Object verifyPolicyHandler(ResourceMatching rmList)
Starts job using the first resource from rmList.

Parameters:
rmList - list of suitable resources.
Returns:

jobControl

public void jobControl(java.lang.String jobId,
                       java.lang.Integer ctrOp,
                       java.security.cert.X509Certificate userCtx)
                throws java.lang.Exception
Apply the operation Control to the specific jobId

Parameters:
jobId - of the job to apply the operation
ctrOp - an Integer defining the operation. Possible values are defined in enum class ControlOperations E.g. ControlOperations.SUSPENDJOB.val().
userCtx - the user certificate TODO check void returning methods and exceptions bug.
Throws:
java.lang.Exception

exitJob

public void exitJob(java.lang.String jobId,
                    java.lang.Integer exitValue,
                    java.security.cert.X509Certificate userCtx)
             throws java.lang.Exception
The job identified by the jobId is finished immediately (all the processes of the job) with the exit code provided. If the process calling exitJob is part of the job this function never returns.

Parameters:
jobId - identifying the job to exit
exitValue - the exit code the job will finish with
userCtx - the user certificate
Throws:
java.lang.Exception

getJobsInfo

public java.lang.String getJobsInfo(java.util.ArrayList<java.lang.String> jobIds,
                                    java.lang.Integer flags,
                                    java.lang.Integer infoLevel,
                                    java.util.ArrayList<java.lang.String> metrics,
                                    java.security.cert.X509Certificate userCtx)
Returns the monitoring information of the requested jobs. Any user of the VO can access monitoring data of the job, but if he is not the owner, access will be restricted to unbuffered system metrics.

Parameters:
jobId - an ArrayList of String which is the list of requested jobs.
flags - an Integer used as a mask to define the metrics to return. TypeOfInfo enum class has the values of each type. E.g. TypeOfInfo.BASIC.val(). Several flags might be combined as logical OR operands. Values and description:

BASIC: Job identification, status and times

JOB_DEFINITION: The JSDL

RESOURCES_ALLOCATED: ReservationIDs

RESOURCES_CONSUMED: Extended information about resources (to be implemented)

USER_METRICS: Metrics added by the user with the addJobMetric method

infoLevel - an Integer defining the sources of information allowed in the operation. InfoLevel enum class should be used to get the right values. E.g. InfoLevel.PROCESS.val(). Values and description:

JOB: it communicates only with the jobMng.

PROCESS: it communicates both with jobMng and exec- or resMng.

KERNEL: it communicates with every needed service and also considers special kernel metrics (to be implemented)

metrics - a list of metrics to obtain, additional to the flags
userCtx - the X509Certificate Object with the user certificate
Returns:
XML String containing the information of the Jobs
Throws:
java.lang.Exception - if 1) Incorrect jobId 2) Invalid User in the Certificate 3) Incorrect list of metrics 4) Not a valid XtreemOS certificate

getJobInfoCB

public java.lang.String getJobInfoCB(java.lang.String info)

getJobInfoCBE

public java.lang.String getJobInfoCBE(java.lang.Object info)

getJInfo

private JobInfo getJInfo(java.lang.String jobId,
                         int typeOfInfo,
                         java.lang.String user)

getJobInfo

public java.lang.String getJobInfo(java.lang.String jobId,
                                   java.lang.Integer flags,
                                   java.lang.Integer infoLevel,
                                   java.util.ArrayList<java.lang.String> metricsList,
                                   java.security.cert.X509Certificate userCtx)
                            throws java.lang.Exception
TODO: To be eventually deprecated. It's not in the monitoring deliverable

Parameters:
jobId -
flags -
infoLevel -
metricsList -
userCtx -
Returns:
Throws:
java.lang.Exception
See Also:
getJobsInfo(ArrayList, Integer, Integer, ArrayList, X509Certificate)

getSingleJobInfo

public java.lang.String getSingleJobInfo(java.lang.String jobId,
                                         java.lang.Integer flags,
                                         java.lang.Integer infoLevel,
                                         java.util.ArrayList<java.lang.String> metricsList,
                                         java.security.cert.X509Certificate userCtx)
                                  throws java.lang.Exception
This method is called from getJobsInfo or another jobMng via redirect.

Parameters:
jobId -
flags -
infoLevel -
metricsList -
userCtx -
Returns:
a String in XML with a tag and its contents.
Throws:
java.lang.Exception
See Also:
getJobsInfo(ArrayList, Integer, Integer, ArrayList, X509Certificate)

getJobIdCB

public java.lang.String getJobIdCB(java.lang.String jobId)
                            throws java.lang.Exception
Throws:
java.lang.Exception

getJobId

public java.lang.String getJobId(CommunicationAddress resourceId,
                                 java.lang.Integer pid,
                                 java.security.cert.X509Certificate userCtx)
                          throws java.lang.Exception
Returns the jobID corresponding to a

Parameters:
resourceId - the CommunicationAddress of the resource where the process is assigned
pid - an Integer with the process identifier local to that resource
userCtx - an X509Certificate object with the user certificate
Returns:
a String representing the jobId
Throws:
java.lang.Exception - when either of these happens: 1) Not a valid XtreemOS certificate 2) Invalid User in the Certificate 3) No jobID associated with the requested

getJobsUser

public java.util.ArrayList<java.lang.String> getJobsUser(java.lang.String userId,
                                                         java.security.cert.X509Certificate userCtx)
                                                  throws java.lang.Exception
Returns all the jobIDs of the jobs belonging to the given user

Parameters:
userId - a String identifying the user we want to obtain the jobs from, by its Distinguished Name. If not specified, it uses the one in the certificate.
userCtx - a X509Certificate object with the user certificate
Returns:
an ArrayList of Strings representing the jobIDs
Throws:
java.lang.Exception - if 1) Not a valid XtreemOS certificate

getJobsUserCB

public java.util.ArrayList<java.lang.String> getJobsUserCB(java.lang.Object obj)

sendEvent

public void sendEvent(java.lang.String jobId,
                      java.lang.Integer signal,
                      java.lang.Integer operation,
                      java.util.ArrayList<java.lang.String> list,
                      java.security.cert.X509Certificate userCtx)
               throws java.lang.Exception
Sends an event to a job

Parameters:
signal - number to be sent
operation - can be: 0: ALL 1: MASTER 2: SPECIFYED in list 3: EXCEPT in list
jobId - identifying the job we are going to send the signal to
list -
userCtx - the user certificate TODO: Now we send always the signal to all the processes of the job, use the operation and list parameters
Throws:
java.lang.Exception

jobWait

public java.lang.Integer jobWait(java.lang.String jobId,
                                 java.security.cert.X509Certificate userCtx)
                          throws java.lang.Exception
Blocks the calling process until the job indicated finishes

Parameters:
jobId - identifying the job the calling process is waiting for
Returns:
the exit value of the job
Throws:
java.lang.Exception

createProcess

public java.lang.Integer createProcess(java.lang.String jobId,
                                       java.lang.String JSDL,
                                       java.lang.String reservationId,
                                       CommunicationAddress resource,
                                       java.security.cert.X509Certificate userCtx)
                                throws java.lang.Exception
Creates a new Process inside the specified JobID In order to increase performance, user can specify the ReservationID. Resource can be used to force the execution on the resource, (only if it's inside the reservationID). Finally JSDL can be specified to change executable and parameters of the process Priority: Reservation > resource > JSDL > job JSDLInfo

Parameters:
jobId -
JSDL, - JSDL content
reservationId -
resource -
userCtx -
Returns:
Throws:
java.lang.Exception

getProcessInfoCB

public java.lang.String getProcessInfoCB(java.lang.String info)

getProcessInfoCBE

public java.lang.String getProcessInfoCBE(java.lang.Exception info)

jobFinished

public void jobFinished(java.lang.String jobId,
                        java.lang.Integer exitStatus,
                        CommunicationAddress execMng)

cleanMonitoring

public void cleanMonitoring(java.lang.String jobId)
This method is called from CronDaemon some time after job's death to clean metrics' data structures. Every resource manager of a resource where the job has run is cleaned and also every exec manager of a resource where buffering was enabled and never disabled. Job is also retired from Job Directory and Job Manager.

Parameters:
jobId -

jobRunning

public void jobRunning(java.lang.String jobId)

redirectFunctionCB

public java.lang.Object redirectFunctionCB(CommunicationAddress addrJob)
                                    throws java.lang.Exception
Throws:
java.lang.Exception - if 1) Incorrect jobId

addDependenceUp

public void addDependenceUp(java.lang.String jobId,
                            java.lang.String FromJobId,
                            java.lang.String tag,
                            java.security.cert.X509Certificate userCtx)

addDependenceDown

public void addDependenceDown(java.lang.String jobId,
                              java.lang.String toJobId,
                              java.lang.String tag,
                              java.security.cert.X509Certificate userCtx)

deleteDependenceUp

public void deleteDependenceUp(java.lang.String jobId,
                               java.lang.String FromJobId,
                               java.lang.String tag,
                               java.security.cert.X509Certificate userCtx)

deleteDependenceDown

public void deleteDependenceDown(java.lang.String jobId,
                                 java.lang.String toJobId,
                                 java.lang.String tag,
                                 java.security.cert.X509Certificate userCtx)

addDependence

public void addDependence(java.lang.String FromJobId,
                          java.lang.String toJobId,
                          java.lang.String TAG,
                          java.security.cert.X509Certificate userCtx)
                   throws java.lang.Exception
This function adds a new job dependence. If we think on an arrow, it will go from FromJobId to ToJobID. We support sets of dependencies identified by TAG, to be able to use them for different purposes. The AEM won't interpret these TAGS, just group dependencies of jobs based on them. AEM won't check cycles in job dependencies. It is user/job responsibility

Parameters:
FromJobId - the job from which tojobId depends
toJobId - the job that depends on fromJobId
TAG - is a name to identify a set of dependencies.
userCtx - the user certificate
Throws:
java.lang.Exception

deleteDependence

public void deleteDependence(java.lang.String FromJobId,
                             java.lang.String ToJobId,
                             java.lang.String TAG,
                             java.security.cert.X509Certificate userCtx)
                      throws java.lang.Exception
Deletes an existing dependence between two specific jobs

Parameters:
FromJobId - the job from which tojobId depends
toJobId - the job that depends on fromJobId
TAG - is a name to identify a set of dependencies.
userCtx - the user certificate
Throws:
java.lang.Exception

getListOfDependences

public java.util.ArrayList<java.lang.String> getListOfDependences(java.lang.String jobID,
                                                                  java.lang.String tag,
                                                                  java.lang.Integer levels,
                                                                  java.lang.Integer direction,
                                                                  java.security.cert.X509Certificate userCtx)
                                                           throws java.lang.Exception
Returns the list of jobs that have a dependence to the given job. Take into account, dependences are directional and named. This method is recursive and user may specify a depth limit.

Parameters:
jobID - a String with the identifier of the starting job to get the related jobs
tag - a String with the name to identify a set of dependencies.
levels - an Integer with the recursion depth. For no limit, use -1
direction - an Integer to select the type of dependence, as defined in enum class DependenceDirection E.g. DependenceDirection.UP.val()
userCtx - the user certificate TODO use certificate to check privileges in every method that works with deps.
Returns:
list of jobIds that have the specified (UP or DOWN) dependence to the given job and so recursively for the given levels.
Throws:
java.lang.Exception

getListOfDependencesCB

public java.util.ArrayList<java.lang.String> getListOfDependencesCB(java.util.ArrayList<java.lang.String> depList)

updateJobHints

public void updateJobHints(java.lang.String jobId,
                           java.lang.Integer operation,
                           java.lang.String requirements,
                           java.lang.Boolean wait,
                           java.security.cert.X509Certificate userCtx)
                    throws java.lang.Exception
Modifies the hints passed in createJob function

Parameters:
jobId - identifying the job to change the requirements of
operation - can be: 0: MIGRATION_REQ 1: SCHEDULING_REQ
requirements - identifying the job to change the requirements of
wait - if the call is synchronous or asynchronous
userCtx - the user certificate
Throws:
java.lang.Exception

getJobMetrics

public java.util.ArrayList<eu.xtreemos.xosd.utilities.metrics.MetricsDesc> getJobMetrics(java.lang.String jobId,
                                                                                         java.security.cert.X509Certificate userCtx)
                                                                                  throws java.lang.Exception
Returns the list of available metrics for a specific job, both system and user defined.

Parameters:
jobId - a String identifying the job to get the metrics from, if the jobId is null the generic list of metrics is returned
userCtx - a X509Certificate object with the user certificate
Returns:
an ArrayList of MetricsDesc objects corresponding to the metrics associated to the job
Throws:
java.lang.Exception - if 1) Not a valid XtreemOS certificate 2) Invalid User in the Certificate 3) Incorrect jobId

setMetricValue

public java.lang.Integer setMetricValue(java.lang.String jobId,
                                        java.lang.String metricName,
                                        CommunicationAddress resourceID,
                                        java.lang.Integer pid,
                                        java.lang.String value,
                                        java.security.cert.X509Certificate userCtx)
                                 throws java.lang.Exception
Sets the value of a Metric. A metric will be an user-defined attribute of the job. Not all the attributes can be set, for instance the user time or the status are set by the system, not by the user.

Parameters:
jobId - a String identifying the job, the metric is associated to.
metricName - is a String with the name of the metric to be set.
value - a String with the value to be set to the metric. It's a string, but it will be parsed according to the metric type. If it's time type, the string must be in jiffies format.
Returns:
an Integer: 0 on success. Callbacks: redirectFunctionCB, returnCB, returnCBE. Might enqueue jobDir.getJobAddr, execMng.addJobMetric and resMng.addJobMetric
Throws:
java.lang.Exception - if 1) Incorrect jobId 2) Invalid User in the Certificate 3) Not a valid XtreemOS certificate 4) Metric doesn't exist 5) Trying to set value on a system metric

setMonitorBuffering

public java.lang.Integer setMonitorBuffering(java.lang.String jobId,
                                             java.lang.String metricName,
                                             CommunicationAddress resourceID,
                                             java.lang.Integer pid,
                                             java.lang.Integer flags,
                                             java.security.cert.X509Certificate userCtx)
                                      throws java.lang.Exception
Switches on and off buffering for the specified metric. With buffering on, multiple values of a metric are returned, and its timestamps represent the time when the value was changed. Only metrics defined as "bufferable" on creation can be buffered.

Parameters:
jobId - a String identifying the job whose metric is about to get buffered. Its worth noticing that buffering is not applied to the whole job but to the smallest part of it according to the scope.
metricName - a String with the name of the metric.
resourceID - the CommunicationAddress of the resource where the buffering is to be applied. Used only if the scope of the metric is resource, jobUnit or process.
pid - Integer with the process ID of the process where the buffering is to be applied. Used only if the scope of the metric is process.
flags - an Integer mask with options to aggregate several metrics, tag buffers and enable or disable buffering. Use flags |= TypeOfInfo.ENABLE.val() to enable buffering flags |= ~TypeOfInfo.ENABLE.val() to disable buffering TODO: add support for metric aggregations, if required.
userCtx - a X509Certificate object with the user certificate.
Returns:
an Integer: 0 on success. Callbacks: redirectFunctionCB, returnCB, returnCBE and getRIDForProcStatusCB. Might enqueue jobDir.getJobAddr, execMng.addJobMetric, resMng.addJobMetric and reservationMng.getReservationFromJob.
Throws:
java.lang.Exception - if 1) Incorrect jobId 2) Invalid User in the Certificate 3) Not a valid XtreemOS certificate 4) Metric doesn't exist 5) Metric cannot provide buffering capabilities

addJobMetric

public java.lang.Integer addJobMetric(java.lang.String jobId,
                                      eu.xtreemos.xosd.utilities.metrics.MetricsDesc metric,
                                      java.security.cert.X509Certificate userCtx)
                               throws java.lang.Exception
Adds a new user defined metric to the job. Afterwards, user might give values to it through the setMetricValue interface and get them with getJobInfo. Metrics are checked for correctness before insertion. It might communicate with other services and XOSDs if needed.

Parameters:
jobId - a String identifying the job to add the metric to
metric - a MetricsDesc object with the metric to be added
userCtx - a X509Certificate object with the user certificate
Returns:
an Integer: 0 on success. Callbacks: redirectFunctionCB, returnCB, returnCBE. Might enqueue jobDir.getJobAddr, execMng.addJobMetric and resMng.addJobMetric
Throws:
java.lang.Exception - if 1) Incorrect jobId 2) Invalid User in the Certificate 3) Not a valid XtreemOS certificate 4) Metric already exists 5) Incorrect metric description

updateJobMetric

public void updateJobMetric(java.lang.String jobId,
                            eu.xtreemos.xosd.utilities.metrics.MetricsDesc metric,
                            java.security.cert.X509Certificate userCtx)
                     throws java.lang.Exception
Not defined yet

Throws:
java.lang.Exception

removeJobMetric

public java.lang.Integer removeJobMetric(java.lang.String jobId,
                                         java.lang.String metricName,
                                         java.security.cert.X509Certificate userCtx)
                                  throws java.lang.Exception
Removes a user defined metric from the job. Data associated to the metric is also erased. It might communicate with other services and XOSDs if needed.

Parameters:
jobId - a String identifying the job to remove the metric from
metricName - a String with the name of the metric to be removed
userCtx - a X509Certificate object with the user certificate
Returns:
an Integer: 0 on success. Callbacks: redirectFunctionCB, returnCB, returnCBE. Might enqueue jobDir.getJobAddr, execMng.addJobMetric and resMng.addJobMetric
Throws:
java.lang.Exception - if 1) Incorrect jobId 2) Invalid User in the Certificate 3) Not a valid XtreemOS certificate 4) Metric doesn't exist 5) Trying to remove a system metric

removeMetrics

private void removeMetrics(java.lang.String jobId)
Removes metric data associated to the job. This method should be called whenever a job gets retired from the jobMng.

Parameters:
jobId - a String identifying the job whose metrics are about to be erased

removeBuffers

private void removeBuffers(java.lang.String jobId)
Removes buffered data associated to the job. This method should be called whenever a job gets retired from the jobMng. Associated definitions are also erased.

Parameters:
jobId - a String identifying the job whose metrics are about to be erased

chgUID

public void chgUID(java.lang.String jobId,
                   java.security.cert.X509Certificate oldUserCtx,
                   java.security.cert.X509Certificate newUserCtx)
            throws java.lang.Exception
Changes the UID associated to a jobId

Parameters:
jobId - identifying the job whose user will change
oldUserCtx - the certificate of the current user
newUserCtx - the new certificate to be used
Throws:
java.lang.Exception

updateJobRequirements

public void updateJobRequirements(java.lang.String jobId,
                                  java.lang.Integer operation,
                                  java.lang.String requirements,
                                  java.lang.Boolean wait,
                                  java.security.cert.X509Certificate userCtx)
                           throws java.lang.Exception
Modifies the requirements passed in createJob function.

Parameters:
jobId - identifying the job to change the requirements of
operation - can be: 0: ADD_RESOURCES 1: RELEASE_RESOURCES 2: EXTEND_RESOURCES
requirements - in a JSDL format specifying the new requirements
wait - if the call is synchronous or asynchronous
userCtx - the user certificate
Throws:
java.lang.Exception

jobMonitoringControl

public void jobMonitoringControl(java.lang.String jobId,
                                 java.lang.Integer op,
                                 java.lang.String level,
                                 java.security.cert.X509Certificate userCtx)
                          throws java.lang.Exception
Update the characteristics of the monitorization of a job. It allows to start We can use start,stop to enable/disable all the callbacks with one XOS call. It is still pending to decide if we are going to use start/stop to automatically call external start/stop functions

Parameters:
jobId -
op - can be: 0:START 1:STOP 2:CHANGE_LEVEL
level - the new monitoring level in case op is set to CHANGE. Can be: High, medium or Low
userCtx - the user certificate
Throws:
java.lang.Exception

waitForEvent

public void waitForEvent(java.lang.String jobId,
                         java.lang.Integer event,
                         java.security.cert.X509Certificate userCtx)
                  throws java.lang.Exception
Blocks the calling process until "event" is received.

Parameters:
jobId -
event - the event the process is waiting for
userCtx - the user certificate
Throws:
java.lang.Exception

identifyJobManagers

public void identifyJobManagers(java.util.ArrayList<java.lang.String> dependentJobs,
                                java.lang.String initialJobId,
                                java.lang.String strategy,
                                java.lang.String options,
                                java.lang.String mode,
                                java.security.cert.X509Certificate cert)
identify job manager address of each job


getJobManagerAddressesCB

public java.lang.Object getJobManagerAddressesCB(java.util.ArrayList<CommunicationAddress> addrJobList)
                                          throws java.lang.Exception
range addresses equals range dependent jobs save job manager addresses, BARRIER: contact super job cp after all job manger addresses have been collected

Parameters:
addrJob -
Returns:
Throws:
java.lang.Exception

lockCheckpoint

public void lockCheckpoint(java.lang.String jobId,
                           java.lang.String initialJobId,
                           java.security.cert.X509Certificate userCert,
                           CommunicationAddress jobCpAddr,
                           CommunicationAddress superJobCpAddr,
                           java.lang.String strategy,
                           java.lang.String options,
                           java.lang.String mode,
                           java.util.ArrayList<java.lang.String> dependentJobs)
                    throws java.lang.Exception
lock a checkpoint action - private member jobslist needed piggyback jsdl file and executable name

Throws:
java.lang.Exception

unlockCheckpoint

public void unlockCheckpoint(java.lang.String jobId)
                      throws java.lang.Exception
Throws:
java.lang.Exception

lockRestart

public void lockRestart(java.lang.String jobId)

unlockRestart

public void unlockRestart(java.lang.String jobId)

recreateJob

public void recreateJob(java.lang.String jobId,
                        java.lang.String initialJobId,
                        java.lang.String jsdlFileContent,
                        java.lang.String checkpointVersion,
                        CommunicationAddress superJobCpAddr,
                        java.util.ArrayList<CommunicationAddress> jobResourceList,
                        java.security.cert.X509Certificate userCert)
                 throws java.lang.Exception
set up structures that represent a job in AEM at restart

Throws:
java.lang.Exception

checkGridNodeForJobUnitRestart

boolean checkGridNodeForJobUnitRestart(java.util.ArrayList<CommunicationAddress> list,
                                       java.lang.String jobId,
                                       java.lang.Integer jobUnitId)
verify that each job unit of a job resides on a separate grid node

Parameters:
list -
jobId -
jobUnitId -
Returns:

returnCB

public java.lang.Object returnCB(java.lang.Object obj)

returnCBE

public java.lang.Object returnCBE(java.lang.Exception ex)
                           throws java.lang.Exception
Throws:
java.lang.Exception

handleEvent

public void handleEvent(java.lang.Object event)
                 throws java.lang.Exception
Specified by:
handleEvent in interface eu.xtreemos.system.eventmachine.queue.IEventHandler
Specified by:
handleEvent in class eu.xtreemos.system.eventmachine.stage.AbstractReceivingStage
Throws:
java.lang.Exception

getHandledEventType

public java.lang.String getHandledEventType()
Specified by:
getHandledEventType in class eu.xtreemos.system.eventmachine.stage.AbstractReceivingStage