Multitask jobs support

Since version 0.2.0 you can also use dbx together with multitask job feature.

Please note the following - since attribute libraries is not supported on the job level in multistep jobs, during deployment the dependencies will be propagated towards every task definition.

You can read more about multistep jobs here (AWS, Azure, GCP).

Here are some examples for multitask job definitions:

{
    "default": {
        "jobs": [
            {
                "name": "multitask-job-name",
                "tasks": [
                    {
                        "task_key": "first-task",
                        "description": "some description",
                        "new_cluster": {
                            "spark_version": "7.3.x-cpu-ml-scala2.12",
                            "node_type_id": "<some-node-type>",
                            "num_workers": 2
                        },
                        "max_retries": 0,
                        "spark_python_task": {
                            "python_file": "placeholder_1.py"
                        }
                    },
                    {
                        "task_key": "second",
                        "description": "some description",
                        "new_cluster": {
                            "spark_version": "7.3.x-cpu-ml-scala2.12",
                            "node_type_id": "<some-node-type>",
                            "num_workers": 2
                        },
                        "max_retries": 0,
                        "spark_python_task": {
                            "python_file": "placeholder_1.py"
                        },
                        "depends_on": [
                            {
                                "task_key": "first-task"
                            }
                        ]
                    }
                ]
            }
        ]
    }
}

# http://yaml.org/spec/1.2/spec.html
# https://learnxinyminutes.com/docs/yaml/

custom:
  basic-cluster-props: &basic-cluster-props
    spark_version: "7.3.x-cpu-ml-scala2.12"
    node_type_id: "<some-node-type>"

  basic-static-cluster: &basic-static-cluster
    new_cluster:
      <<: *basic-cluster-props
      num_workers: 2

environments:
  default:
    jobs:
      - name: "your-job-name"
        tasks:
          - task_key: "first-task"
            <<: *basic-static-cluster
            spark_python_task:
              python_file: "./placeholder_1.py"
          - task_key: "second-task"
            <<: *basic-static-cluster
            spark_python_task:
              python_file: "./placeholder_2.py"
            depends_on:
              - task_key: "second-task"