Skip to content

jobs

JobState

Bases: str, Enum

JobStates

BOOT_FAIL = 'BOOT_FAIL' class-attribute instance-attribute

CANCELLED = 'CANCELLED' class-attribute instance-attribute

COMPLETED = 'COMPLETED' class-attribute instance-attribute

COMPLETING = 'COMPLETING' class-attribute instance-attribute

CONFIGURING = 'CONFIGURING' class-attribute instance-attribute

DEADLINE = 'DEADLINE' class-attribute instance-attribute

FAILED = 'FAILED' class-attribute instance-attribute

NODE_FAIL = 'NODE_FAIL' class-attribute instance-attribute

OUT_OF_MEMORY = 'OUT_OF_MEMORY' class-attribute instance-attribute

PENDING = 'PENDING' class-attribute instance-attribute

PREEMPTED = 'PREEMPTED' class-attribute instance-attribute

REQUEUED = 'REQUEUED' class-attribute instance-attribute

REQUEUE_FED = 'REQUEUE_FED' class-attribute instance-attribute

REQUEUE_HOLD = 'REQUEUE_HOLD' class-attribute instance-attribute

RESIZING = 'RESIZING' class-attribute instance-attribute

RESV_DEL_HOLD = 'RESV_DEL_HOLD' class-attribute instance-attribute

REVOKED = 'REVOKED' class-attribute instance-attribute

RUNNING = 'RUNNING' class-attribute instance-attribute

SIGNALING = 'SIGNALING' class-attribute instance-attribute

SPECIAL_EXIT = 'SPECIAL_EXIT' class-attribute instance-attribute

STAGE_OUT = 'STAGE_OUT' class-attribute instance-attribute

STOPPED = 'STOPPED' class-attribute instance-attribute

SUSPENDED = 'SUSPENDED' class-attribute instance-attribute

TIMEOUT = 'TIMEOUT' class-attribute instance-attribute

AsyncJob

Bases: BaseModel, ABC

Models a job submitted to run on a compute resource.

compute = None class-attribute instance-attribute

jobid = None class-attribute instance-attribute

state = None class-attribute instance-attribute

cancel(wait=False) async

Cancel a running job

Parameters:

Name Type Description Default
wait bool

True, to wait for job be to cancel, otherwise returns when cancellation has been submitted.

False
Source code in sfapi_client/_async/jobs.py
async def cancel(self, wait=False):
    """
    Cancel a running job

    :param wait: True, to wait for job be to cancel, otherwise returns when
    cancellation
    has been submitted.
    :type wait: bool


    """
    # We have wait for a jobid before we can cancel
    while self.jobid is None:
        await _ASYNC_SLEEP()

    await self.compute.client.delete(
        f"compute/jobs/{self.compute.name}/{self.jobid}"
    )

    if wait:
        while self.state != JobState.CANCELLED:
            await self.update()
            await _ASYNC_SLEEP(self.compute.client._wait_interval)

complete(timeout=sys.maxsize) async

Wait for a job to move into a terminal state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout is reached

Source code in sfapi_client/_async/jobs.py
async def complete(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into a terminal state.

    :param timeout: The maximum time to wait in seconds, the actually
    wait time will be in 10 second increments.
    :raises TimeoutError: if timeout is reached
    """
    return await self._wait_until_complete(timeout)

dict(*args, **kwargs)

Source code in sfapi_client/_async/jobs.py
def dict(self, *args, **kwargs) -> Dict:
    if "exclude" not in kwargs:
        kwargs["exclude"] = {"compute"}
    return super().dict(*args, **kwargs)

running(timeout=sys.maxsize) async

Wait for a job to move into running state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout if reached

Source code in sfapi_client/_async/jobs.py
async def running(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into running state.

    :param timeout: The maximum time to wait in seconds, the actually wait
    time will be in 10 second increments.
    :raises TimeoutError: if timeout if reached
    """
    state = await self._wait_until([JobState.RUNNING] + TERMINAL_STATES, timeout)
    if state != JobState.RUNNING:
        raise SfApiError(
            f"Job never entered the running state, end state was: {state}"
        )

    return state

state_validate(v)

Source code in sfapi_client/_async/jobs.py
@field_validator("state", mode="before", check_fields=False)
def state_validate(cls, v):
    # sacct return a state of the form "CANCELLED by XXXX" for the
    # cancelled state, coerce into value that will match a state
    # modeled by the enum
    if v.startswith("CANCELLED by"):
        return "CANCELLED"

    return v

update() async

Update the state of the job by fetching the state from the compute resource.

Source code in sfapi_client/_async/jobs.py
async def update(self):
    """
    Update the state of the job by fetching the state from the compute resource.
    """
    job_state = await self._fetch_state()
    self._update(job_state)

AsyncJobSacct

Models a job running on a compute resource, the information is fetched using sacct.

account = None class-attribute instance-attribute

array_job_id = None class-attribute instance-attribute

array_task_id = None class-attribute instance-attribute

command = None class-attribute instance-attribute

comment = None class-attribute instance-attribute

contiguous = None class-attribute instance-attribute

core_spec = None class-attribute instance-attribute

cores_per_socket = None class-attribute instance-attribute

cpus = None class-attribute instance-attribute

dependency = None class-attribute instance-attribute

end_time = None class-attribute instance-attribute

exc_nodes = None class-attribute instance-attribute

exec_host = None class-attribute instance-attribute

features = None class-attribute instance-attribute

field_ = None class-attribute instance-attribute

group = None class-attribute instance-attribute

jobid = None class-attribute instance-attribute

licenses = None class-attribute instance-attribute

min_cpus = None class-attribute instance-attribute

min_memory = None class-attribute instance-attribute

min_tmp_disk = None class-attribute instance-attribute

name = None class-attribute instance-attribute

nice = None class-attribute instance-attribute

nodelist = None class-attribute instance-attribute

nodelist_reason_ = Field(None, alias='nodelist(reason)') class-attribute instance-attribute

nodes = None class-attribute instance-attribute

over_subscribe = None class-attribute instance-attribute

partition = None class-attribute instance-attribute

priority = None class-attribute instance-attribute

qos = None class-attribute instance-attribute

reason = None class-attribute instance-attribute

req_nodes = None class-attribute instance-attribute

reservation = None class-attribute instance-attribute

s_c_t = Field(None, alias='s:c:t') class-attribute instance-attribute

schednodes = None class-attribute instance-attribute

sockets_per_node = None class-attribute instance-attribute

st = None class-attribute instance-attribute

start_time = None class-attribute instance-attribute

state = None class-attribute instance-attribute

submit_time = None class-attribute instance-attribute

threads_per_core = None class-attribute instance-attribute

time = None class-attribute instance-attribute

time_left = None class-attribute instance-attribute

time_limit = None class-attribute instance-attribute

tres_per_node = None class-attribute instance-attribute

uid = None class-attribute instance-attribute

user = None class-attribute instance-attribute

wckey = None class-attribute instance-attribute

work_dir = None class-attribute instance-attribute

AsyncJobSqueue

Models a job running on a compute resource, the information is fetched using squeue.

account = None class-attribute instance-attribute

admincomment = None class-attribute instance-attribute

alloccpus = None class-attribute instance-attribute

allocnodes = None class-attribute instance-attribute

alloctres = None class-attribute instance-attribute

associd = None class-attribute instance-attribute

avecpu = None class-attribute instance-attribute

avecpufreq = None class-attribute instance-attribute

avediskread = None class-attribute instance-attribute

avediskwrite = None class-attribute instance-attribute

avepages = None class-attribute instance-attribute

averss = None class-attribute instance-attribute

avevmsize = None class-attribute instance-attribute

blockid = None class-attribute instance-attribute

cluster = None class-attribute instance-attribute

comment = None class-attribute instance-attribute

constraints = None class-attribute instance-attribute

consumedenergy = None class-attribute instance-attribute

consumedenergyraw = None class-attribute instance-attribute

cputime = None class-attribute instance-attribute

cputimeraw = None class-attribute instance-attribute

dbindex = None class-attribute instance-attribute

derivedexitcode = None class-attribute instance-attribute

elapsed = None class-attribute instance-attribute

elapsedraw = None class-attribute instance-attribute

eligible = None class-attribute instance-attribute

end = None class-attribute instance-attribute

exitcode = None class-attribute instance-attribute

flags = None class-attribute instance-attribute

gid = None class-attribute instance-attribute

group = None class-attribute instance-attribute

jobid = None class-attribute instance-attribute

jobidraw = None class-attribute instance-attribute

jobname = None class-attribute instance-attribute

layout = None class-attribute instance-attribute

maxdiskread = None class-attribute instance-attribute

maxdiskreadnode = None class-attribute instance-attribute

maxdiskreadtask = None class-attribute instance-attribute

maxdiskwrite = None class-attribute instance-attribute

maxdiskwritenode = None class-attribute instance-attribute

maxdiskwritetask = None class-attribute instance-attribute

maxpages = None class-attribute instance-attribute

maxpagesnode = None class-attribute instance-attribute

maxpagestask = None class-attribute instance-attribute

maxrss = None class-attribute instance-attribute

maxrssnode = None class-attribute instance-attribute

maxrsstask = None class-attribute instance-attribute

maxvmsize = None class-attribute instance-attribute

maxvmsizenode = None class-attribute instance-attribute

maxvmsizetask = None class-attribute instance-attribute

mcslabel = None class-attribute instance-attribute

mincpu = None class-attribute instance-attribute

mincpunode = None class-attribute instance-attribute

mincputask = None class-attribute instance-attribute

ncpus = None class-attribute instance-attribute

nnodes = None class-attribute instance-attribute

nodelist = None class-attribute instance-attribute

ntasks = None class-attribute instance-attribute

partition = None class-attribute instance-attribute

priority = None class-attribute instance-attribute

qos = None class-attribute instance-attribute

qosraw = None class-attribute instance-attribute

reason = None class-attribute instance-attribute

reqcpufreq = None class-attribute instance-attribute

reqcpufreqgov = None class-attribute instance-attribute

reqcpufreqmax = None class-attribute instance-attribute

reqcpufreqmin = None class-attribute instance-attribute

reqcpus = None class-attribute instance-attribute

reqmem = None class-attribute instance-attribute

reqnodes = None class-attribute instance-attribute

reqtres = None class-attribute instance-attribute

reservation = None class-attribute instance-attribute

reservationid = None class-attribute instance-attribute

reserved = None class-attribute instance-attribute

resvcpu = None class-attribute instance-attribute

resvcpuraw = None class-attribute instance-attribute

start = None class-attribute instance-attribute

state = None class-attribute instance-attribute

submit = None class-attribute instance-attribute

suspended = None class-attribute instance-attribute

systemcomment = None class-attribute instance-attribute

systemcpu = None class-attribute instance-attribute

timelimit = None class-attribute instance-attribute

timelimitraw = None class-attribute instance-attribute

totalcpu = None class-attribute instance-attribute

tresusageinave = None class-attribute instance-attribute

tresusageinmax = None class-attribute instance-attribute

tresusageinmaxnode = None class-attribute instance-attribute

tresusageinmaxtask = None class-attribute instance-attribute

tresusageinmin = None class-attribute instance-attribute

tresusageinminnode = None class-attribute instance-attribute

tresusageinmintask = None class-attribute instance-attribute

tresusageintot = None class-attribute instance-attribute

tresusageoutave = None class-attribute instance-attribute

tresusageoutmax = None class-attribute instance-attribute

tresusageoutmaxnode = None class-attribute instance-attribute

tresusageoutmaxtask = None class-attribute instance-attribute

tresusageoutmin = None class-attribute instance-attribute

tresusageoutminnode = None class-attribute instance-attribute

tresusageoutmintask = None class-attribute instance-attribute

tresusageouttot = None class-attribute instance-attribute

uid = None class-attribute instance-attribute

user = None class-attribute instance-attribute

usercpu = None class-attribute instance-attribute

wckey = None class-attribute instance-attribute

wckeyid = None class-attribute instance-attribute

workdir = None class-attribute instance-attribute

Models a job running on a compute resource, the information is fetched using squeue.

account = None class-attribute instance-attribute

array_job_id = None class-attribute instance-attribute

array_task_id = None class-attribute instance-attribute

command = None class-attribute instance-attribute

comment = None class-attribute instance-attribute

compute = None class-attribute instance-attribute

contiguous = None class-attribute instance-attribute

core_spec = None class-attribute instance-attribute

cores_per_socket = None class-attribute instance-attribute

cpus = None class-attribute instance-attribute

dependency = None class-attribute instance-attribute

end_time = None class-attribute instance-attribute

exc_nodes = None class-attribute instance-attribute

exec_host = None class-attribute instance-attribute

features = None class-attribute instance-attribute

field_ = None class-attribute instance-attribute

group = None class-attribute instance-attribute

jobid = None class-attribute instance-attribute

licenses = None class-attribute instance-attribute

min_cpus = None class-attribute instance-attribute

min_memory = None class-attribute instance-attribute

min_tmp_disk = None class-attribute instance-attribute

name = None class-attribute instance-attribute

nice = None class-attribute instance-attribute

nodelist = None class-attribute instance-attribute

nodelist_reason_ = Field(None, alias='nodelist(reason)') class-attribute instance-attribute

nodes = None class-attribute instance-attribute

over_subscribe = None class-attribute instance-attribute

partition = None class-attribute instance-attribute

priority = None class-attribute instance-attribute

qos = None class-attribute instance-attribute

reason = None class-attribute instance-attribute

req_nodes = None class-attribute instance-attribute

reservation = None class-attribute instance-attribute

s_c_t = Field(None, alias='s:c:t') class-attribute instance-attribute

schednodes = None class-attribute instance-attribute

sockets_per_node = None class-attribute instance-attribute

st = None class-attribute instance-attribute

start_time = None class-attribute instance-attribute

state = None class-attribute instance-attribute

submit_time = None class-attribute instance-attribute

threads_per_core = None class-attribute instance-attribute

time = None class-attribute instance-attribute

time_left = None class-attribute instance-attribute

time_limit = None class-attribute instance-attribute

tres_per_node = None class-attribute instance-attribute

uid = None class-attribute instance-attribute

user = None class-attribute instance-attribute

wckey = None class-attribute instance-attribute

work_dir = None class-attribute instance-attribute

cancel(wait=False) async

Cancel a running job

Parameters:

Name Type Description Default
wait bool

True, to wait for job be to cancel, otherwise returns when cancellation has been submitted.

False
Source code in sfapi_client/_async/jobs.py
async def cancel(self, wait=False):
    """
    Cancel a running job

    :param wait: True, to wait for job be to cancel, otherwise returns when
    cancellation
    has been submitted.
    :type wait: bool


    """
    # We have wait for a jobid before we can cancel
    while self.jobid is None:
        await _ASYNC_SLEEP()

    await self.compute.client.delete(
        f"compute/jobs/{self.compute.name}/{self.jobid}"
    )

    if wait:
        while self.state != JobState.CANCELLED:
            await self.update()
            await _ASYNC_SLEEP(self.compute.client._wait_interval)

complete(timeout=sys.maxsize) async

Wait for a job to move into a terminal state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout is reached

Source code in sfapi_client/_async/jobs.py
async def complete(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into a terminal state.

    :param timeout: The maximum time to wait in seconds, the actually
    wait time will be in 10 second increments.
    :raises TimeoutError: if timeout is reached
    """
    return await self._wait_until_complete(timeout)

dict(*args, **kwargs)

Source code in sfapi_client/_async/jobs.py
def dict(self, *args, **kwargs) -> Dict:
    if "exclude" not in kwargs:
        kwargs["exclude"] = {"compute"}
    return super().dict(*args, **kwargs)

running(timeout=sys.maxsize) async

Wait for a job to move into running state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout if reached

Source code in sfapi_client/_async/jobs.py
async def running(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into running state.

    :param timeout: The maximum time to wait in seconds, the actually wait
    time will be in 10 second increments.
    :raises TimeoutError: if timeout if reached
    """
    state = await self._wait_until([JobState.RUNNING] + TERMINAL_STATES, timeout)
    if state != JobState.RUNNING:
        raise SfApiError(
            f"Job never entered the running state, end state was: {state}"
        )

    return state

state_validate(v)

Source code in sfapi_client/_async/jobs.py
@field_validator("state", mode="before", check_fields=False)
def state_validate(cls, v):
    # sacct return a state of the form "CANCELLED by XXXX" for the
    # cancelled state, coerce into value that will match a state
    # modeled by the enum
    if v.startswith("CANCELLED by"):
        return "CANCELLED"

    return v

update() async

Update the state of the job by fetching the state from the compute resource.

Source code in sfapi_client/_async/jobs.py
async def update(self):
    """
    Update the state of the job by fetching the state from the compute resource.
    """
    job_state = await self._fetch_state()
    self._update(job_state)