Skip to content

jobs

JobState

Bases: str, Enum

JobStates

BOOT_FAIL = 'BOOT_FAIL' class-attribute instance-attribute

CANCELLED = 'CANCELLED' class-attribute instance-attribute

COMPLETED = 'COMPLETED' class-attribute instance-attribute

COMPLETING = 'COMPLETING' class-attribute instance-attribute

CONFIGURING = 'CONFIGURING' class-attribute instance-attribute

DEADLINE = 'DEADLINE' class-attribute instance-attribute

FAILED = 'FAILED' class-attribute instance-attribute

NODE_FAIL = 'NODE_FAIL' class-attribute instance-attribute

OUT_OF_MEMORY = 'OUT_OF_MEMORY' class-attribute instance-attribute

PENDING = 'PENDING' class-attribute instance-attribute

PREEMPTED = 'PREEMPTED' class-attribute instance-attribute

REQUEUED = 'REQUEUED' class-attribute instance-attribute

REQUEUE_FED = 'REQUEUE_FED' class-attribute instance-attribute

REQUEUE_HOLD = 'REQUEUE_HOLD' class-attribute instance-attribute

RESIZING = 'RESIZING' class-attribute instance-attribute

RESV_DEL_HOLD = 'RESV_DEL_HOLD' class-attribute instance-attribute

REVOKED = 'REVOKED' class-attribute instance-attribute

RUNNING = 'RUNNING' class-attribute instance-attribute

SIGNALING = 'SIGNALING' class-attribute instance-attribute

SPECIAL_EXIT = 'SPECIAL_EXIT' class-attribute instance-attribute

STAGE_OUT = 'STAGE_OUT' class-attribute instance-attribute

STOPPED = 'STOPPED' class-attribute instance-attribute

SUSPENDED = 'SUSPENDED' class-attribute instance-attribute

TIMEOUT = 'TIMEOUT' class-attribute instance-attribute

Job

Bases: BaseModel, ABC

Models a job submitted to run on a compute resource.

compute: Optional['Compute'] = None class-attribute instance-attribute

jobid: Optional[str] = None class-attribute instance-attribute

state: Optional[JobState] = None class-attribute instance-attribute

cancel(wait=False)

Cancel a running job

Parameters:

Name Type Description Default
wait bool

True, to wait for job be to cancel, otherwise returns when cancellation has been submitted.

False
Source code in sfapi_client/_sync/jobs.py
def cancel(self, wait=False):
    """
    Cancel a running job

    :param wait: True, to wait for job be to cancel, otherwise returns when
    cancellation
    has been submitted.
    :type wait: bool


    """
    # We have wait for a jobid before we can cancel
    while self.jobid is None:
        _SLEEP()

    self.compute.client.delete(
        f"compute/jobs/{self.compute.name}/{self.jobid}"
    )

    if wait:
        while self.state != JobState.CANCELLED:
            self.update()
            _SLEEP(self.compute.client._wait_interval)

complete(timeout=sys.maxsize)

Wait for a job to move into a terminal state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout is reached

Source code in sfapi_client/_sync/jobs.py
def complete(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into a terminal state.

    :param timeout: The maximum time to wait in seconds, the actually
    wait time will be in 10 second increments.
    :raises TimeoutError: if timeout is reached
    """
    return self._wait_until_complete(timeout)

dict(*args, **kwargs)

Source code in sfapi_client/_sync/jobs.py
def dict(self, *args, **kwargs) -> Dict:
    if "exclude" not in kwargs:
        kwargs["exclude"] = {"compute"}
    return super().dict(*args, **kwargs)

running(timeout=sys.maxsize)

Wait for a job to move into running state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout if reached

Source code in sfapi_client/_sync/jobs.py
def running(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into running state.

    :param timeout: The maximum time to wait in seconds, the actually wait
    time will be in 10 second increments.
    :raises TimeoutError: if timeout if reached
    """
    state = self._wait_until([JobState.RUNNING] + TERMINAL_STATES, timeout)
    if state != JobState.RUNNING:
        raise SfApiError(
            f"Job never entered the running state, end state was: {state}"
        )

    return state

state_validate(v)

Source code in sfapi_client/_sync/jobs.py
@field_validator("state", mode="before", check_fields=False)
def state_validate(cls, v):
    # sacct return a state of the form "CANCELLED by XXXX" for the
    # cancelled state, coerce into value that will match a state
    # modeled by the enum
    if v.startswith("CANCELLED by"):
        return "CANCELLED"

    return v

update()

Update the state of the job by fetching the state from the compute resource.

Source code in sfapi_client/_sync/jobs.py
def update(self):
    """
    Update the state of the job by fetching the state from the compute resource.
    """
    job_state = self._fetch_state()
    self._update(job_state)

JobSacct

Models a job running on a compute resource, the information is fetched using sacct.

account: Optional[str] = None class-attribute instance-attribute

array_job_id: Optional[str] = None class-attribute instance-attribute

array_task_id: Optional[str] = None class-attribute instance-attribute

command: Optional[str] = None class-attribute instance-attribute

comment: Optional[str] = None class-attribute instance-attribute

contiguous: Optional[str] = None class-attribute instance-attribute

core_spec: Optional[str] = None class-attribute instance-attribute

cores_per_socket: Optional[str] = None class-attribute instance-attribute

cpus: Optional[str] = None class-attribute instance-attribute

dependency: Optional[str] = None class-attribute instance-attribute

end_time: Optional[str] = None class-attribute instance-attribute

exc_nodes: Optional[str] = None class-attribute instance-attribute

exec_host: Optional[str] = None class-attribute instance-attribute

features: Optional[str] = None class-attribute instance-attribute

field_: Optional[str] = None class-attribute instance-attribute

group: Optional[str] = None class-attribute instance-attribute

jobid: Optional[str] = None class-attribute instance-attribute

licenses: Optional[str] = None class-attribute instance-attribute

min_cpus: Optional[str] = None class-attribute instance-attribute

min_memory: Optional[str] = None class-attribute instance-attribute

min_tmp_disk: Optional[str] = None class-attribute instance-attribute

name: Optional[str] = None class-attribute instance-attribute

nice: Optional[str] = None class-attribute instance-attribute

nodelist: Optional[str] = None class-attribute instance-attribute

nodelist_reason_: Optional[str] = Field(None, alias='nodelist(reason)') class-attribute instance-attribute

nodes: Optional[str] = None class-attribute instance-attribute

over_subscribe: Optional[str] = None class-attribute instance-attribute

partition: Optional[str] = None class-attribute instance-attribute

priority: Optional[str] = None class-attribute instance-attribute

qos: Optional[str] = None class-attribute instance-attribute

reason: Optional[str] = None class-attribute instance-attribute

req_nodes: Optional[str] = None class-attribute instance-attribute

reservation: Optional[str] = None class-attribute instance-attribute

s_c_t: Optional[str] = Field(None, alias='s:c:t') class-attribute instance-attribute

schednodes: Optional[str] = None class-attribute instance-attribute

sockets_per_node: Optional[str] = None class-attribute instance-attribute

st: Optional[str] = None class-attribute instance-attribute

start_time: Optional[str] = None class-attribute instance-attribute

state: Optional[str] = None class-attribute instance-attribute

submit_time: Optional[str] = None class-attribute instance-attribute

threads_per_core: Optional[str] = None class-attribute instance-attribute

time: Optional[str] = None class-attribute instance-attribute

time_left: Optional[str] = None class-attribute instance-attribute

time_limit: Optional[str] = None class-attribute instance-attribute

tres_per_node: Optional[str] = None class-attribute instance-attribute

uid: Optional[str] = None class-attribute instance-attribute

user: Optional[str] = None class-attribute instance-attribute

wckey: Optional[str] = None class-attribute instance-attribute

work_dir: Optional[str] = None class-attribute instance-attribute

JobSqueue

Models a job running on a compute resource, the information is fetched using squeue.

account: Optional[str] = None class-attribute instance-attribute

admincomment: Optional[str] = None class-attribute instance-attribute

alloccpus: Optional[str] = None class-attribute instance-attribute

allocnodes: Optional[str] = None class-attribute instance-attribute

alloctres: Optional[str] = None class-attribute instance-attribute

associd: Optional[str] = None class-attribute instance-attribute

avecpu: Optional[str] = None class-attribute instance-attribute

avecpufreq: Optional[str] = None class-attribute instance-attribute

avediskread: Optional[str] = None class-attribute instance-attribute

avediskwrite: Optional[str] = None class-attribute instance-attribute

avepages: Optional[str] = None class-attribute instance-attribute

averss: Optional[str] = None class-attribute instance-attribute

avevmsize: Optional[str] = None class-attribute instance-attribute

blockid: Optional[str] = None class-attribute instance-attribute

cluster: Optional[str] = None class-attribute instance-attribute

comment: Optional[str] = None class-attribute instance-attribute

constraints: Optional[str] = None class-attribute instance-attribute

consumedenergy: Optional[str] = None class-attribute instance-attribute

consumedenergyraw: Optional[str] = None class-attribute instance-attribute

cputime: Optional[str] = None class-attribute instance-attribute

cputimeraw: Optional[str] = None class-attribute instance-attribute

dbindex: Optional[str] = None class-attribute instance-attribute

derivedexitcode: Optional[str] = None class-attribute instance-attribute

elapsed: Optional[str] = None class-attribute instance-attribute

elapsedraw: Optional[str] = None class-attribute instance-attribute

eligible: Optional[str] = None class-attribute instance-attribute

end: Optional[str] = None class-attribute instance-attribute

exitcode: Optional[str] = None class-attribute instance-attribute

flags: Optional[str] = None class-attribute instance-attribute

gid: Optional[str] = None class-attribute instance-attribute

group: Optional[str] = None class-attribute instance-attribute

jobid: Optional[str] = None class-attribute instance-attribute

jobidraw: Optional[str] = None class-attribute instance-attribute

jobname: Optional[str] = None class-attribute instance-attribute

layout: Optional[str] = None class-attribute instance-attribute

maxdiskread: Optional[str] = None class-attribute instance-attribute

maxdiskreadnode: Optional[str] = None class-attribute instance-attribute

maxdiskreadtask: Optional[str] = None class-attribute instance-attribute

maxdiskwrite: Optional[str] = None class-attribute instance-attribute

maxdiskwritenode: Optional[str] = None class-attribute instance-attribute

maxdiskwritetask: Optional[str] = None class-attribute instance-attribute

maxpages: Optional[str] = None class-attribute instance-attribute

maxpagesnode: Optional[str] = None class-attribute instance-attribute

maxpagestask: Optional[str] = None class-attribute instance-attribute

maxrss: Optional[str] = None class-attribute instance-attribute

maxrssnode: Optional[str] = None class-attribute instance-attribute

maxrsstask: Optional[str] = None class-attribute instance-attribute

maxvmsize: Optional[str] = None class-attribute instance-attribute

maxvmsizenode: Optional[str] = None class-attribute instance-attribute

maxvmsizetask: Optional[str] = None class-attribute instance-attribute

mcslabel: Optional[str] = None class-attribute instance-attribute

mincpu: Optional[str] = None class-attribute instance-attribute

mincpunode: Optional[str] = None class-attribute instance-attribute

mincputask: Optional[str] = None class-attribute instance-attribute

ncpus: Optional[str] = None class-attribute instance-attribute

nnodes: Optional[str] = None class-attribute instance-attribute

nodelist: Optional[str] = None class-attribute instance-attribute

ntasks: Optional[str] = None class-attribute instance-attribute

partition: Optional[str] = None class-attribute instance-attribute

priority: Optional[str] = None class-attribute instance-attribute

qos: Optional[str] = None class-attribute instance-attribute

qosraw: Optional[str] = None class-attribute instance-attribute

reason: Optional[str] = None class-attribute instance-attribute

reqcpufreq: Optional[str] = None class-attribute instance-attribute

reqcpufreqgov: Optional[str] = None class-attribute instance-attribute

reqcpufreqmax: Optional[str] = None class-attribute instance-attribute

reqcpufreqmin: Optional[str] = None class-attribute instance-attribute

reqcpus: Optional[str] = None class-attribute instance-attribute

reqmem: Optional[str] = None class-attribute instance-attribute

reqnodes: Optional[str] = None class-attribute instance-attribute

reqtres: Optional[str] = None class-attribute instance-attribute

reservation: Optional[str] = None class-attribute instance-attribute

reservationid: Optional[str] = None class-attribute instance-attribute

reserved: Optional[str] = None class-attribute instance-attribute

resvcpu: Optional[str] = None class-attribute instance-attribute

resvcpuraw: Optional[str] = None class-attribute instance-attribute

start: Optional[str] = None class-attribute instance-attribute

state: Optional[str] = None class-attribute instance-attribute

submit: Optional[str] = None class-attribute instance-attribute

suspended: Optional[str] = None class-attribute instance-attribute

systemcomment: Optional[str] = None class-attribute instance-attribute

systemcpu: Optional[str] = None class-attribute instance-attribute

timelimit: Optional[str] = None class-attribute instance-attribute

timelimitraw: Optional[str] = None class-attribute instance-attribute

totalcpu: Optional[str] = None class-attribute instance-attribute

tresusageinave: Optional[str] = None class-attribute instance-attribute

tresusageinmax: Optional[str] = None class-attribute instance-attribute

tresusageinmaxnode: Optional[str] = None class-attribute instance-attribute

tresusageinmaxtask: Optional[str] = None class-attribute instance-attribute

tresusageinmin: Optional[str] = None class-attribute instance-attribute

tresusageinminnode: Optional[str] = None class-attribute instance-attribute

tresusageinmintask: Optional[str] = None class-attribute instance-attribute

tresusageintot: Optional[str] = None class-attribute instance-attribute

tresusageoutave: Optional[str] = None class-attribute instance-attribute

tresusageoutmax: Optional[str] = None class-attribute instance-attribute

tresusageoutmaxnode: Optional[str] = None class-attribute instance-attribute

tresusageoutmaxtask: Optional[str] = None class-attribute instance-attribute

tresusageoutmin: Optional[str] = None class-attribute instance-attribute

tresusageoutminnode: Optional[str] = None class-attribute instance-attribute

tresusageoutmintask: Optional[str] = None class-attribute instance-attribute

tresusageouttot: Optional[str] = None class-attribute instance-attribute

uid: Optional[str] = None class-attribute instance-attribute

user: Optional[str] = None class-attribute instance-attribute

usercpu: Optional[str] = None class-attribute instance-attribute

wckey: Optional[str] = None class-attribute instance-attribute

wckeyid: Optional[str] = None class-attribute instance-attribute

workdir: Optional[str] = None class-attribute instance-attribute

Models a job running on a compute resource, the information is fetched using squeue.

account: Optional[str] = None class-attribute instance-attribute

array_job_id: Optional[str] = None class-attribute instance-attribute

array_task_id: Optional[str] = None class-attribute instance-attribute

command: Optional[str] = None class-attribute instance-attribute

comment: Optional[str] = None class-attribute instance-attribute

compute: Optional['Compute'] = None class-attribute instance-attribute

contiguous: Optional[str] = None class-attribute instance-attribute

core_spec: Optional[str] = None class-attribute instance-attribute

cores_per_socket: Optional[str] = None class-attribute instance-attribute

cpus: Optional[str] = None class-attribute instance-attribute

dependency: Optional[str] = None class-attribute instance-attribute

end_time: Optional[str] = None class-attribute instance-attribute

exc_nodes: Optional[str] = None class-attribute instance-attribute

exec_host: Optional[str] = None class-attribute instance-attribute

features: Optional[str] = None class-attribute instance-attribute

field_: Optional[str] = None class-attribute instance-attribute

group: Optional[str] = None class-attribute instance-attribute

jobid: Optional[str] = None class-attribute instance-attribute

licenses: Optional[str] = None class-attribute instance-attribute

min_cpus: Optional[str] = None class-attribute instance-attribute

min_memory: Optional[str] = None class-attribute instance-attribute

min_tmp_disk: Optional[str] = None class-attribute instance-attribute

name: Optional[str] = None class-attribute instance-attribute

nice: Optional[str] = None class-attribute instance-attribute

nodelist: Optional[str] = None class-attribute instance-attribute

nodelist_reason_: Optional[str] = Field(None, alias='nodelist(reason)') class-attribute instance-attribute

nodes: Optional[str] = None class-attribute instance-attribute

over_subscribe: Optional[str] = None class-attribute instance-attribute

partition: Optional[str] = None class-attribute instance-attribute

priority: Optional[str] = None class-attribute instance-attribute

qos: Optional[str] = None class-attribute instance-attribute

reason: Optional[str] = None class-attribute instance-attribute

req_nodes: Optional[str] = None class-attribute instance-attribute

reservation: Optional[str] = None class-attribute instance-attribute

s_c_t: Optional[str] = Field(None, alias='s:c:t') class-attribute instance-attribute

schednodes: Optional[str] = None class-attribute instance-attribute

sockets_per_node: Optional[str] = None class-attribute instance-attribute

st: Optional[str] = None class-attribute instance-attribute

start_time: Optional[str] = None class-attribute instance-attribute

state: Optional[JobState] = None class-attribute instance-attribute

submit_time: Optional[str] = None class-attribute instance-attribute

threads_per_core: Optional[str] = None class-attribute instance-attribute

time: Optional[str] = None class-attribute instance-attribute

time_left: Optional[str] = None class-attribute instance-attribute

time_limit: Optional[str] = None class-attribute instance-attribute

tres_per_node: Optional[str] = None class-attribute instance-attribute

uid: Optional[str] = None class-attribute instance-attribute

user: Optional[str] = None class-attribute instance-attribute

wckey: Optional[str] = None class-attribute instance-attribute

work_dir: Optional[str] = None class-attribute instance-attribute

cancel(wait=False)

Cancel a running job

Parameters:

Name Type Description Default
wait bool

True, to wait for job be to cancel, otherwise returns when cancellation has been submitted.

False
Source code in sfapi_client/_sync/jobs.py
def cancel(self, wait=False):
    """
    Cancel a running job

    :param wait: True, to wait for job be to cancel, otherwise returns when
    cancellation
    has been submitted.
    :type wait: bool


    """
    # We have wait for a jobid before we can cancel
    while self.jobid is None:
        _SLEEP()

    self.compute.client.delete(
        f"compute/jobs/{self.compute.name}/{self.jobid}"
    )

    if wait:
        while self.state != JobState.CANCELLED:
            self.update()
            _SLEEP(self.compute.client._wait_interval)

complete(timeout=sys.maxsize)

Wait for a job to move into a terminal state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout is reached

Source code in sfapi_client/_sync/jobs.py
def complete(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into a terminal state.

    :param timeout: The maximum time to wait in seconds, the actually
    wait time will be in 10 second increments.
    :raises TimeoutError: if timeout is reached
    """
    return self._wait_until_complete(timeout)

dict(*args, **kwargs)

Source code in sfapi_client/_sync/jobs.py
def dict(self, *args, **kwargs) -> Dict:
    if "exclude" not in kwargs:
        kwargs["exclude"] = {"compute"}
    return super().dict(*args, **kwargs)

running(timeout=sys.maxsize)

Wait for a job to move into running state.

Parameters:

Name Type Description Default
timeout int

The maximum time to wait in seconds, the actually wait time will be in 10 second increments.

maxsize

Raises:

Type Description
TimeoutError

if timeout if reached

Source code in sfapi_client/_sync/jobs.py
def running(self, timeout: int = sys.maxsize):
    """
    Wait for a job to move into running state.

    :param timeout: The maximum time to wait in seconds, the actually wait
    time will be in 10 second increments.
    :raises TimeoutError: if timeout if reached
    """
    state = self._wait_until([JobState.RUNNING] + TERMINAL_STATES, timeout)
    if state != JobState.RUNNING:
        raise SfApiError(
            f"Job never entered the running state, end state was: {state}"
        )

    return state

state_validate(v)

Source code in sfapi_client/_sync/jobs.py
@field_validator("state", mode="before", check_fields=False)
def state_validate(cls, v):
    # sacct return a state of the form "CANCELLED by XXXX" for the
    # cancelled state, coerce into value that will match a state
    # modeled by the enum
    if v.startswith("CANCELLED by"):
        return "CANCELLED"

    return v

update()

Update the state of the job by fetching the state from the compute resource.

Source code in sfapi_client/_sync/jobs.py
def update(self):
    """
    Update the state of the job by fetching the state from the compute resource.
    """
    job_state = self._fetch_state()
    self._update(job_state)