databricks · nfx · Jan 29, 2021 · Jan 22, 2021 · Jan 22, 2021 · Jan 22, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,8 +14,6 @@
 * Fixed support for [single node clusters](https://docs.databricks.com/clusters/single-node.html) support by allowing [`num_workers` to be `0`](https://github.com/databrickslabs/terraform-provider-databricks/pull/454).
 
 **Behavior changes**
-
-* Added optional parameter `azure_environment` to provider config which defaults to `public`.
 * Removed deprecated `library_jar`, `library_egg`, `library_whl`, `library_pypi`, `library_cran`, and `library_maven` from `databricks_cluster` and `databricks_job` in favor of more API-transparent [library](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/cluster#library-configuration-block) configuration block.
 * Removed deprecated `notebook_path` and `notebook_base_parameters` from `databricks_job` in favor of [notebook_task](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/job#notebook_task-configuration-block) configuration block.
 * Removed deprecated `jar_uri`, `jar_main_class_name`, and `jar_parameters` from `databricks_job` in favor of [spark_jar_task](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/job#spark_jar_task-configuration-block) configuration block.
@@ -25,7 +23,7 @@
 * Removed deprecated `databricks_scim_group` resource in favor of [databricks_group](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/group).
 * Removed deprecated `databricks_default_user_roles` data source in favor of [databricks_group](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/data-sources/group#attribute-reference) data source.
 * Removed deprecated `basic_auth` and `azure_auth` provider configuration blocks in favor of [documented authentication methods](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs).
-* `format`, `overwrite`, and `mkdirs` were removed from `databricks_notebook`. TODO: handle RESOURCE_ALREADY_EXISTS for mkdirs.
+* `format`, `overwrite`, and `mkdirs` were removed from `databricks_notebook`. To follow expected behavior of Terraform, notebooks are always overwritten.
 * `skip_validation` from `databricks_instance_profile` was removed and is always set to `true` for subsequent requests.
 * `databricks_mws_workspace` got `verify_workspace_runnning` removed and now validates all every deployment. In case deployment failed, it removes workspace that failed and returns error message with explanation.
 * `default_tags` were removed from `databricks_instance_pool`. `disk_spec` got new attribute `disk_type`, that contains `azure_disk_volume_type` and `ebs_volume_type`. This change is made to closer reflect API structure.

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -109,6 +109,7 @@ $ docker run -it -v $(pwd):/workpace -w /workpace databricks-terraform apply
 * Consider test functions as scenarios, that you are debugging from IDE when specific issues arise. Test tables are discouraged. Single-use functions in tests are discouraged, unless resource definitions they make are longer than 80 lines.
 * All tests should be capable of repeatedly running on "dirty" environment, which means not requiring a new clean environment every time the test runs.
 * All tests should re-use compute resources whenever possible.
+* Prefer `require.NoError` (stops the test on error) to `assert.NoError` (continues the test on error) when checking the results.
 
 ## Code conventions
 
@@ -129,7 +130,7 @@ Eventually, all of resources would be automatically checked for a unit test pres
 
 ```go
 for name, resource := range p.ResourcesMap {
-	if name != "databricks_scim_user" {
+	if name != "databricks_user" {
 		continue
 	}
 	//...

diff --git a/compute/acceptance/cluster_test.go b/compute/acceptance/cluster_test.go
@@ -41,10 +41,6 @@ func getCloudSpecificHCLStatements() cloudSpecificHCLStatements {
 	}
 }
 
-func testDefaultZones() string {
-	return "data \"databricks_zones\" \"default_zones\" {}\n"
-}
-
 type instancePoolHCLBuilder struct {
 	Name          string
 	identifier    string
@@ -101,11 +97,6 @@ func getAwsAttributes(attributesMap map[string]string) string {
 	return awsAttr.String()
 }
 
-func (i *instancePoolHCLBuilder) withAwsAttributes(attributesMap map[string]string) *instancePoolHCLBuilder {
-	i.awsAttributes = getAwsAttributes(attributesMap)
-	return i
-}
-
 func getCommonLibraries() string {
 	return `
 	library {
@@ -237,76 +228,6 @@ func TestAwsAccClusterResource_ValidatePlan(t *testing.T) {
 	})
 }
 
-func TestAwsAccClusterResource_CreateClusterViaInstancePool(t *testing.T) {
-	randomInstancePoolSuffix := acctest.RandStringFromCharSet(10, acctest.CharSetAlphaNum)
-	randomInstancePoolName := fmt.Sprintf("pool-%s", randomInstancePoolSuffix)
-	randomInstancePoolInterpolation := fmt.Sprintf("databricks_instance_pool.%s.id", randomInstancePoolName)
-	randomClusterSuffix := acctest.RandStringFromCharSet(10, acctest.CharSetAlphaNum)
-	randomClusterName := fmt.Sprintf("cluster-%s", randomClusterSuffix)
-	randomClusterID := fmt.Sprintf("databricks_cluster.%s", randomClusterName)
-	awsAttrInstancePool := map[string]string{
-		"zone_id":      "${data.databricks_zones.default_zones.default_zone}",
-		"availability": "SPOT",
-	}
-	randomStr := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum)
-	instanceProfileRName := "my-tf-test-instance-profile"
-	instanceProfile := fmt.Sprintf("arn:aws:iam::999999999999:instance-profile/tf-test-%s", randomStr)
-	awsAttrCluster := map[string]string{
-		"instance_profile_arn": fmt.Sprintf("${databricks_instance_profile.%s.id}", instanceProfileRName),
-	}
-
-	clusterNoInstanceProfileConfig := testDefaultZones() +
-		testAWSDatabricksInstanceProfile(instanceProfile, instanceProfileRName) +
-		newInstancePoolHCLBuilder(randomInstancePoolName).
-			withAwsAttributes(awsAttrInstancePool).withCloudEnv().
-			build() +
-		newClusterHCLBuilder(randomClusterName).
-			withAwsAttributes(nil).
-			withInstancePool(randomInstancePoolInterpolation).
-			build()
-
-	clusterWithInstanceProfileConfig := testDefaultZones() +
-		testAWSDatabricksInstanceProfile(instanceProfile, instanceProfileRName) +
-		newInstancePoolHCLBuilder(randomInstancePoolName).
-			withAwsAttributes(awsAttrInstancePool).withCloudEnv().
-			build() +
-		newClusterHCLBuilder(randomClusterName).
-			withAwsAttributes(awsAttrCluster).
-			withInstancePool(randomInstancePoolInterpolation).
-			build()
-
-	acceptance.AccTest(t, resource.TestCase{
-		Steps: []resource.TestStep{
-			{
-				Config: clusterNoInstanceProfileConfig,
-				Check: resource.ComposeTestCheckFunc(
-					testClusterCheckAndTerminateForFutureTests(randomClusterID, t),
-				),
-			},
-			{
-				Config: clusterWithInstanceProfileConfig,
-				Check: resource.ComposeTestCheckFunc(
-					testClusterCheckAndTerminateForFutureTests(randomClusterID, t),
-				),
-			},
-			{
-				Config: clusterNoInstanceProfileConfig,
-				Check: resource.ComposeTestCheckFunc(
-					testClusterCheckAndTerminateForFutureTests(randomClusterID, t),
-				),
-				PlanOnly:           true,
-				ExpectNonEmptyPlan: true,
-			},
-			{
-				Config: clusterNoInstanceProfileConfig,
-				Check: resource.ComposeTestCheckFunc(
-					testClusterCheckAndTerminateForFutureTests(randomClusterID, t),
-				),
-			},
-		},
-	})
-}
-
 func TestAzureAccClusterResource_CreateClusterViaInstancePool(t *testing.T) {
 	if os.Getenv("CLOUD_ENV") == "" {
 		return
@@ -382,6 +303,7 @@ func TestAccClusterResource_CreateSingleNodeCluster(t *testing.T) {
 						"spark.databricks.cluster.profile" = "singleNode"
 						"spark.master" = "local[*]"
 					}
+					aws_attributes {}
 				}`, randomName, clusterAPI.GetSmallestNodeType(NodeTypeRequest{LocalDisk: true}),
 					clusterAPI.LatestSparkVersionOrDefault(SparkVersionRequest{Latest: true, LongTermSupport: true})),
 			},
@@ -395,12 +317,3 @@ func testClusterCheckAndTerminateForFutureTests(n string, t *testing.T) resource
 			return NewClustersAPI(ctx, client).Terminate(id)
 		})
 }
-
-func testAWSDatabricksInstanceProfile(instanceProfile string, name string) string {
-	return fmt.Sprintf(`
-		resource "databricks_instance_profile" "%s" {
-			instance_profile_arn = "%s"
-			skip_validation = true
-		}
-		`, name, instanceProfile)
-}
diff --git a/compute/acceptance/job_test.go b/compute/acceptance/job_test.go
@@ -15,6 +15,7 @@ import (
 	"github.com/databrickslabs/databricks-terraform/internal/qa"
 	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 func TestAwsAccJobsCreate(t *testing.T) {
@@ -35,7 +36,9 @@ func TestAwsAccJobsCreate(t *testing.T) {
 			AwsAttributes: &AwsAttributes{
 				Availability: "ON_DEMAND",
 			},
-			NodeTypeID: clustersAPI.GetSmallestNodeType(NodeTypeRequest{}),
+			NodeTypeID: clustersAPI.GetSmallestNodeType(NodeTypeRequest{
+				LocalDisk: true,
+			}),
 		},
 		NotebookTask: &NotebookTask{
 			NotebookPath: "/tf-test/demo-terraform/demo-notebook",
@@ -63,7 +66,7 @@ func TestAwsAccJobsCreate(t *testing.T) {
 	}
 
 	job, err := jobsAPI.Create(jobSettings)
-	assert.NoError(t, err, err)
+	require.NoError(t, err, err)
 	id := job.ID()
 	defer func() {
 		err := jobsAPI.Delete(id)

diff --git a/compute/resource_instance_pool.go b/compute/resource_instance_pool.go
@@ -29,17 +29,16 @@ func (a InstancePoolsAPI) Create(instancePool InstancePool) (InstancePoolAndStat
 }
 
 // Update edits the configuration of a instance pool to match the provided attributes and size
-func (a InstancePoolsAPI) Update(instancePoolInfo InstancePool) error {
-	return a.client.Post(a.context, "/instance-pools/edit", instancePoolInfo, nil)
+func (a InstancePoolsAPI) Update(ip InstancePool) error {
+	return a.client.Post(a.context, "/instance-pools/edit", ip, nil)
 }
 
 // Read retrieves the information for a instance pool given its identifier
-func (a InstancePoolsAPI) Read(instancePoolID string) (InstancePoolAndStats, error) {
-	var instancePoolInfo InstancePoolAndStats
-	err := a.client.Get(a.context, "/instance-pools/get", map[string]string{
+func (a InstancePoolsAPI) Read(instancePoolID string) (ip InstancePool, err error) {
+	err = a.client.Get(a.context, "/instance-pools/get", map[string]string{
 		"instance_pool_id": instancePoolID,
-	}, &instancePoolInfo)
-	return instancePoolInfo, err
+	}, &ip)
+	return
 }
 
 // List retrieves the list of existing instance pools

diff --git a/compute/resource_instance_pool_test.go b/compute/resource_instance_pool_test.go
@@ -58,30 +58,9 @@ func TestAccInstancePools(t *testing.T) {
 	assert.Equal(t, pool.NodeTypeID, poolReadInfo.NodeTypeID)
 	assert.Equal(t, pool.IdleInstanceAutoTerminationMinutes, poolReadInfo.IdleInstanceAutoTerminationMinutes)
 
-	u := InstancePool{
-		InstancePoolID:                     poolReadInfo.InstancePoolID,
-		InstancePoolName:                   "Terraform Integration Test Updated",
-		MinIdleInstances:                   0,
-		MaxCapacity:                        20,
-		NodeTypeID:                         nodeType,
-		IdleInstanceAutoTerminationMinutes: 20,
-		PreloadedSparkVersions: []string{
-			sparkVersion,
-		},
-	}
-	if !client.IsAzure() {
-		u.DiskSpec = &InstancePoolDiskSpec{
-			DiskType: &InstancePoolDiskType{
-				EbsVolumeType: EbsVolumeTypeGeneralPurposeSsd,
-			},
-			DiskCount: 1,
-			DiskSize:  32,
-		}
-		u.AwsAttributes = &InstancePoolAwsAttributes{
-			Availability: AwsAvailabilitySpot,
-		}
-	}
-	err = NewInstancePoolsAPI(context.Background(), client).Update(u)
+	poolReadInfo.InstancePoolName = "Terraform Integration Test Updated"
+	poolReadInfo.MaxCapacity = 20
+	err = NewInstancePoolsAPI(context.Background(), client).Update(poolReadInfo)
 	assert.NoError(t, err, err)
 
 	poolReadInfo, err = NewInstancePoolsAPI(context.Background(), client).Read(poolInfo.InstancePoolID)

diff --git a/docs/data-sources/notebook.md b/docs/data-sources/notebook.md
@@ -7,14 +7,12 @@ This data source allows to export a notebook from workspace
 ```hcl
 data "databricks_notebook" "features" {
     path = "/Production/Features"
-    format = "SOURCE"
 }
 ```
 
 ## Argument Reference
 
 * `path` - (Required) Notebook path on the workspace
-* `format` - (Required) One of `DBC`, `SOURCE` or `HTML` to define format of exported content
 
 ## Attribute Reference
 

diff --git a/docs/guides/migration-0.3.x.md b/docs/guides/migration-0.3.x.md
@@ -1,7 +1,59 @@
 # Migration from 0.2.x to 0.3.x
 
-Certain resources undergone changes in order to ensure consistency with 
+Certain resources undergone changes in order to ensure consistency with REST API and standard expected Terraform behavior.
+
+## provider
+
+* Rewrite `basic_auth` block with `username` and `password` fields, as specified in [main document](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs#authenticating-with-hostname-username-and-password).
+* Rewrite `azure_auth` block with appropriate [Azure configuration](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs#special-configurations-for-azure).
+
+## databricks_job
+
+* Rewrite `spark_submit_parameters` with [spark_submit_task](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/job#spark_submit_task-configuration-block) configuration block.
+* Rewrite `python_file` and `python_parameters` with [spark_python_task](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/job#spark_python_task-configuration-block) configuration block.
+* Rewrite `jar_uri`, `jar_main_class_name`, and `jar_parameters` with [spark_jar_task](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/job#spark_jar_task-configuration-block) configuration block.
+* Rewrite `notebook_path` and `notebook_base_parameters` with [notebook_task](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/job#notebook_task-configuration-block) configuration block.
+* Rewrite `library_jar`, `library_egg`, `library_whl`, `library_pypi`, `library_cran`, and `library_maven` with [library](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/cluster#library-configuration-block) configuration block.
+
+## databricks_notebook
+
+* Rename `content` to `content_base64`, as this closer represents actual data within the field and simplifies internal code reusability.
+* Remove `format` attribute. Starting from v0.3.0 it behaves as if it is set to `SOURCE`.
+* Remove `overwrite` attribute. Starting from v0.3.0 it behaves as if it is set to `true`.
+* Remove `mkdirs` attributes. Starting from v0.3.0 it behaves as if it is set to `true`.
+
+## databricks_cluster
+
+* Rewrite `library_jar`, `library_egg`, `library_whl`, `library_pypi`, `library_cran`, and `library_maven` with [library](https://registry.terraform.io/providers/databrickslabs/databricks/latest/docs/resources/cluster#library-configuration-block) configuration block.
+
+## databricks_dbfs_file
+
+* Rename `content` to `content_base64`, as this closer represents actual data within the field and simplifies internal code reusability.
+* Remove `overwrite` attribute. Starting from v0.3.0 it behaves as if it is set to `true`.
+* Remove `mkdirs` attribute. Starting from v0.3.0 it behaves as if it is set to `true`.
+* Remove `validate_remote_file` attribute. Due to performance reasons, starting from v0.3.0 it doesn't fetch the contents of remote file to verify the checksum. 
+* If you've relied on internal `content_b64_md5` attribute, please remove it. Starting from v0.3.0 its behavior is internalized.
+
+## databricks_instance_profile
+
+* Remove `skip_validation` from all `databricks_instance_profile` resources. In order to ensure consistency, all AWS EC2 profiles are now checked to work before returning state to main Terraform process.
 
 ## databricks_mws_workspaces
 
-* Remove `verify_workspace_runnning` attributes from all `databricks_mws_workspaces` resources. All workspaces are verified to be running automatically as of [this change](https://github.com/databrickslabs/terraform-provider-databricks/commit/ef64b5d26daa23ff2532f1076a0db01864e4f73c).
+* Remove `verify_workspace_runnning` attribute from all `databricks_mws_workspaces` resources. All workspaces are verified to be running automatically as of [this change](https://github.com/databrickslabs/terraform-provider-databricks/commit/ef64b5d26daa23ff2532f1076a0db01864e4f73c).
+
+## databricks_instance_pool
+
+* Remove `default_tags`.
+
+## databricks_scim_user
+
+* This resource was removed as deprecated. Please rewrite using [databricks_user](../resources/user.md).
+
+## databricks_scim_group
+
+* This resource was removed as deprecated. Please rewrite using [databricks_group](../resources/group.md).
+
+## databricks_default_user_roles
+
+* This data source was removed as deprecated. Please use [databricks_group](../data-sources/group.md) data source for performing equivalent tasks.
diff --git a/docs/index.md b/docs/index.md
@@ -176,9 +176,8 @@ provider "databricks" {
   azure_tenant_id             = var.tenant_id
 }
 
-resource "databricks_scim_user" "my-user" {
-  user_name     = "test-user@databricks.com"
-  display_name  = "Test User"
+resource "databricks_user" "my-user" {
+  user_name = "test-user@databricks.com"
 }
 ```
 
@@ -202,7 +201,7 @@ provider "databricks" {
   azure_workspace_resource_id = azurerm_databricks_workspace.this.id
 }
 
-resource "databricks_scim_user" "my-user" {
+resource "databricks_user" "my-user" {
   user_name     = "test-user@databricks.com"
   display_name  = "Test User"
 }

diff --git a/docs/resources/aws_s3_mount.md b/docs/resources/aws_s3_mount.md
@@ -112,7 +112,6 @@ resource "aws_iam_instance_profile" "this" {
 // Step 13: Register instance profile at Databricks
 resource "databricks_instance_profile" "ds" {
   instance_profile_arn = aws_iam_instance_profile.this.arn
-  skip_validation      = false
 }
 
 // Step 14: now you can do `%fs ls /mnt/experiments` in notebooks

diff --git a/docs/resources/cluster.md b/docs/resources/cluster.md
@@ -197,7 +197,9 @@ Attributes are the same as for the `cluster_log_conf` configuration block.
 
 ## aws_attributes
 
-`aws_attributes` optional configuration block contains attributes related to [clusters running on Amazon Web Services](https://docs.databricks.com/clusters/configure.html#aws-configurations). If not specified at cluster creation, a set of default values will be used.
+`aws_attributes` optional configuration block contains attributes related to [clusters running on Amazon Web Services](https://docs.databricks.com/clusters/configure.html#aws-configurations).
+
+-> **Note** *(AWS only)* Please specify empty configuration block (`aws_attributes {}`), even if you're not setting any custom values. This will prevent any resource update issues.
 
 Here is the example of shared autoscaling cluster with some of AWS options set:
 

diff --git a/docs/resources/group_instance_profile.md b/docs/resources/group_instance_profile.md
@@ -9,7 +9,6 @@ This resource allows you to attach instance profiles to groups created by the [g
 ```hcl
 resource "databricks_instance_profile" "instance_profile" {
     instance_profile_arn = "my_instance_profile_arn"
-    skip_validation = true
 }
 
 resource "databricks_group" "my_group" {