Multitask jobs support

Please note the following - since attribute libraries is not supported on the job level in multistep jobs, during deployment the dependencies will be propagated towards every task definition.

You can read more about multistep jobs here (AWS, Azure, GCP).

Please find some examples for multitask jobs below.

Sample multitask jobs based on Jobs API 2.0

{
    "default": {
        "jobs": [
            {
                "name": "multitask-job-name",
                "tasks": [
                    {
                        "task_key": "first-task",
                        "description": "some description",
                        "new_cluster": {
                            "spark_version": "7.3.x-cpu-ml-scala2.12",
                            "node_type_id": "some-node-type",
                            "num_workers": 2
                        },
                        "max_retries": 0,
                        "spark_python_task": {
                            "python_file": "file://placeholder_1.py"
                        }
                    },
                    {
                        "task_key": "second",
                        "description": "some description",
                        "new_cluster": {
                            "spark_version": "7.3.x-cpu-ml-scala2.12",
                            "node_type_id": "some-node-type",
                            "num_workers": 2
                        },
                        "max_retries": 0,
                        "spark_python_task": {
                            "python_file": "file://placeholder_1.py"
                        },
                        "depends_on": [
                            {
                                "task_key": "first-task"
                            }
                        ]
                    }
                ]
            }
        ]
    }
}

# http://yaml.org/spec/1.2/spec.html
# https://learnxinyminutes.com/docs/yaml/

custom:
  basic-cluster-props: &basic-cluster-props
    spark_version: "7.3.x-cpu-ml-scala2.12"
    node_type_id: "some-node-type"

  basic-static-cluster: &basic-static-cluster
    new_cluster:
      <<: *basic-cluster-props
      num_workers: 2

environments:
  default:
    jobs:
      - name: "your-job-name"
        tasks:
          - task_key: "first-task"
            <<: *basic-static-cluster
            spark_python_task:
              python_file: "file://placeholder_1.py"
          - task_key: "second-task"
            <<: *basic-static-cluster
            spark_python_task:
              python_file: "file://placeholder_2.py"
            depends_on:
              - task_key: "second-task"

Sample multitask jobs based on Jobs API 2.1

Jobs API 2.1 introduces a lot of useful features for job management, and we encourage developers to use this API. If you would like to enable this API, please do one of the following:

In case if you’re using local Databricks CLI profiles, please follow this documentation
In your CI pipeline, simply set this environment variable: export DATABRICKS_JOBS_API_VERSION=2.1 to enable the latest features

{
    "default": {
        "jobs": [
            {
                "name": "dbx_jobs_v21_test",
                "job_clusters": [
                    {
                        "new_cluster": {
                            "spark_version": "9.1.x-cpu-ml-scala2.12",
                            "num_workers": 1,
                            "node_type_id": "{some-node-type-id}"
                        },
                        "job_cluster_key": "basic-cluster"
                    }
                ],
                "tasks": [
                    {
                        "task_key": "first-task",
                        "job_cluster_key": "basic-cluster",
                        "spark_python_task": {
                            "python_file": "file://some/entrypoint.py",
                            "parameters": [
                                "--conf-file",
                                "file:fuse://some/conf/file.yml"
                            ]
                        }
                    },
                    {
                        "task_key": "second-task",
                        "job_cluster_key": "basic-cluster",
                        "spark_python_task": {
                            "python_file": "file://some/entrypoint.py",
                            "parameters": [
                                "--conf-file",
                                "file:fuse://some/conf/file.yml"
                            ]
                        }
                    },
                    {
                        "task_key": "third-task",
                        "job_cluster_key": "basic-cluster",
                        "depends_on": [
                            {
                                "task_key": "first-task"
                            },
                            {
                                "task_key": "second-task"
                            }
                        ],
                        "spark_python_task": {
                            "python_file": "file://some/entrypoint.py",
                            "parameters": [
                                "--conf-file",
                                "file:fuse://some/conf/file.yml"
                            ]
                        }
                    }
                ]
            }
        ]
    }
}

custom:
  basic-cluster-props: &basic-cluster-props
    spark_version: "9.1.x-cpu-ml-scala2.12"

  basic-static-cluster: &basic-static-cluster
    new_cluster:
      <<: *basic-cluster-props
      num_workers: 1
      node_type_id: "{some-node-type-id}"

environments:
  default:
    jobs:
      - name: "dbx_jobs_v21_test"
        job_clusters:
          - job_cluster_key: "basic-cluster"
            <<: *basic-static-cluster
        tasks:
          - task_key: "first-task"
            job_cluster_key: "basic-cluster"
            spark_python_task:
              python_file: "file://some/entrypoint.py"
              parameters: ["--conf-file", "file:fuse://some/conf/file.yml"]
          - task_key: "second-task"
            job_cluster_key: "basic-cluster"
            spark_python_task:
              python_file: "file://some/entrypoint.py"
              parameters: ["--conf-file", "file:fuse://some/conf/file.yml"]
          - task_key: "third-task"
            job_cluster_key: "basic-cluster"
            depends_on:
              - task_key: "first-task"
              - task_key: "second-task"
            spark_python_task:
              python_file: "file://some/entrypoint.py"
              parameters: ["--conf-file", "file:fuse://conf/test/sample_etl_config.yml"]