Azure data Factory

brown9804 · brown9804 · commit cd1e14e90c62 · 2026-02-10T19:39:49.000-06:00
diff --git a/5_analytics-bigdata/README.md b/5_analytics-bigdata/README.md
@@ -12,6 +12,10 @@ Last updated: 2026-02-09
 > [!IMPORTANT]
 > This folder contains sample Terraform templates for Azure analytics and big data services. These templates are starting points and should be customized based on your application needs.
 
+## Templates available
+
+- [Azure Data Factory](./data-factory)
+
 <!-- START BADGE -->
 <div align="center">
   <img src="https://img.shields.io/badge/Total%20views-1706-limegreen" alt="Total views">
diff --git a/5_analytics-bigdata/data-factory/README.md b/5_analytics-bigdata/data-factory/README.md
@@ -0,0 +1,80 @@
+# Terraform Template - Azure Data Factory
+
+Costa Rica
+
+[![GitHub](https://img.shields.io/badge/--181717?logo=github&logoColor=ffffff)](https://github.com/)
+[brown9804](https://github.com/brown9804)
+
+Last updated: 2026-02-11
+
+------------------------------------------
+
+> This template contains Terraform configurations to create an Azure Data Factory instance with a system-assigned managed identity.
+
+## File Descriptions
+
+- **main.tf**: Creates the Resource Group and Azure Data Factory.
+- **variables.tf**: Defines the input variables used in the Terraform configuration.
+- **provider.tf**: Configures the Azure provider to interact with Azure resources.
+- **terraform.tfvars**: Provides example values for the variables defined in `variables.tf`.
+- **outputs.tf**: Defines outputs such as the Data Factory ID and managed identity principal ID.
+
+## Variables
+
+| Variable Name | Description | Type | Example Value |
+| --- | --- | --- | --- |
+| `resource_group_name` | Resource Group name to create/deploy into. | string | `"rg-analytics-dev"` |
+| `location` | Azure region for the deployment. | string | `"eastus"` |
+| `data_factory_name` | Base Azure Data Factory name. If random suffix is enabled, final name is `<base>-<suffix>`. | string | `"adf-analytics-dev"` |
+| `append_random_suffix` | Append a random suffix to avoid global name collisions. | bool | `true` |
+| `random_suffix_length` | Length of the random suffix when enabled. | number | `6` |
+| `public_network_enabled` | Enable/disable public network access for Data Factory. | bool | `true` |
+| `tags` | Tags applied to resources. | map(string) | `{ "env": "dev" }` |
+
+## Usage
+
+1. Authenticate:
+
+   ```sh
+   az login
+   ````
+
+   ```sh
+   az account show
+   # If needed:
+   az account set --subscription "<subscription-id-or-name>"
+   ```
+
+3. Initialize:
+
+   ```sh
+   terraform init -upgrade
+   ```
+
+4. Validate and plan:
+
+   ```sh
+   terraform validate
+   terraform plan
+   ```
+
+5. Apply:
+
+   ```sh
+   terraform apply -auto-approve
+   ```
+
+> [!NOTE]
+> This template creates the Resource Group for you.
+
+> [!NOTE]
+> Azure Data Factory names are globally unique. If you disable `append_random_suffix`, you may hit `DataFactoryNameInUse` and need to change `data_factory_name`.
+
+<!-- START BADGE -->
+<div align="center">
+  <img src="https://img.shields.io/badge/Total%20views-1706-limegreen" alt="Total views">
+  <p>Refresh Date: 2026-02-11</p>
+</div>
+<!-- END BADGE -->
+
+````
diff --git a/5_analytics-bigdata/data-factory/main.tf b/5_analytics-bigdata/data-factory/main.tf
@@ -0,0 +1,45 @@
+# main.tf
+# This file contains the main configuration for creating an Azure Data Factory.
+# It creates a Resource Group and an Azure Data Factory with a system-assigned managed identity.
+
+resource "azurerm_resource_group" "example" {
+  name     = var.resource_group_name
+  location = var.location
+
+  tags = var.tags
+}
+
+resource "random_string" "suffix" {
+  length  = var.random_suffix_length
+  upper   = false
+  special = false
+  numeric = true
+
+  keepers = {
+    resource_group_name = var.resource_group_name
+    location            = var.location
+    base_name           = var.data_factory_name
+  }
+}
+
+locals {
+  data_factory_name_final = var.append_random_suffix ? "${var.data_factory_name}-${random_string.suffix.result}" : var.data_factory_name
+}
+
+resource "azurerm_data_factory" "example" {
+  name                = local.data_factory_name_final
+  location            = azurerm_resource_group.example.location
+  resource_group_name = azurerm_resource_group.example.name
+
+  public_network_enabled = var.public_network_enabled
+
+  identity {
+    type = "SystemAssigned"
+  }
+
+  tags = var.tags
+
+  depends_on = [
+    azurerm_resource_group.example
+  ]
+}
diff --git a/5_analytics-bigdata/data-factory/outputs.tf b/5_analytics-bigdata/data-factory/outputs.tf
@@ -0,0 +1,21 @@
+# outputs.tf
+
+output "resource_group_id" {
+  description = "The ID of the resource group."
+  value       = azurerm_resource_group.example.id
+}
+
+output "data_factory_id" {
+  description = "The resource ID of the Azure Data Factory."
+  value       = azurerm_data_factory.example.id
+}
+
+output "data_factory_name" {
+  description = "The name of the Azure Data Factory."
+  value       = azurerm_data_factory.example.name
+}
+
+output "data_factory_principal_id" {
+  description = "The system-assigned managed identity principalId for the Data Factory."
+  value       = azurerm_data_factory.example.identity[0].principal_id
+}
diff --git a/5_analytics-bigdata/data-factory/provider.tf b/5_analytics-bigdata/data-factory/provider.tf
@@ -0,0 +1,30 @@
+# provider.tf
+# This file configures the Azure provider to interact with Azure resources.
+# It specifies the required provider and its version, along with provider-specific configurations.
+
+terraform {
+  required_version = ">= 1.8, < 2.0"
+
+  required_providers {
+    azurerm = {
+      source  = "hashicorp/azurerm"
+      version = "~> 3.116"
+    }
+
+    random = {
+      source  = "hashicorp/random"
+      version = "~> 3.6"
+    }
+  }
+}
+
+provider "azurerm" {
+  features {
+    resource_group {
+      prevent_deletion_if_contains_resources = false
+    }
+  }
+
+  # Uses the current Azure CLI context (az login + az account set)
+  skip_provider_registration = false
+}
diff --git a/5_analytics-bigdata/data-factory/terraform.tfvars b/5_analytics-bigdata/data-factory/terraform.tfvars
@@ -0,0 +1,17 @@
+resource_group_name = "rg-analytics-dev"
+location            = "eastus"
+
+# Data Factory name must be globally unique.
+# This template appends a random suffix by default to reduce collisions.
+data_factory_name = "adf-analytics-dev"
+
+append_random_suffix = true
+random_suffix_length = 6
+
+public_network_enabled = true
+
+tags = {
+  env  = "dev"
+  area = "analytics-bigdata"
+  iac  = "terraform"
+}
diff --git a/5_analytics-bigdata/data-factory/variables.tf b/5_analytics-bigdata/data-factory/variables.tf
@@ -0,0 +1,61 @@
+# variables.tf
+# This file defines the input variables used in the Terraform configuration.
+
+variable "resource_group_name" {
+  description = "The name of the Azure Resource Group to create and deploy the Data Factory into."
+  type        = string
+
+  validation {
+    condition     = length(trimspace(var.resource_group_name)) > 0
+    error_message = "resource_group_name must not be empty."
+  }
+}
+
+variable "location" {
+  description = "The Azure region where the Resource Group and Data Factory will be created."
+  type        = string
+
+  validation {
+    condition     = length(trimspace(var.location)) > 0
+    error_message = "location must not be empty."
+  }
+}
+
+variable "data_factory_name" {
+  description = "The base name of the Azure Data Factory instance. If append_random_suffix is true, the final name will be '<base>-<suffix>'."
+  type        = string
+
+  validation {
+    condition     = length(trimspace(var.data_factory_name)) > 0
+    error_message = "data_factory_name must not be empty."
+  }
+}
+
+variable "append_random_suffix" {
+  description = "Whether to append a random suffix to the Data Factory name to avoid global name collisions."
+  type        = bool
+  default     = true
+}
+
+variable "random_suffix_length" {
+  description = "Length of the random suffix appended to the Data Factory name when append_random_suffix is true."
+  type        = number
+  default     = 6
+
+  validation {
+    condition     = var.random_suffix_length >= 4 && var.random_suffix_length <= 16
+    error_message = "random_suffix_length must be between 4 and 16."
+  }
+}
+
+variable "public_network_enabled" {
+  description = "Whether public network access is enabled for the Data Factory."
+  type        = bool
+  default     = true
+}
+
+variable "tags" {
+  description = "A map of tags to assign to the resources."
+  type        = map(string)
+  default     = {}
+}
diff --git a/README.md b/README.md
@@ -105,6 +105,7 @@ Last updated: 2026-02-11
 <summary><b> Analytics and Big Data </b> (Click to expand) </summary>
 
 - [Analytics and Big Data](./5_analytics-bigdata)
+  - [Azure Data Factory](./5_analytics-bigdata/data-factory)
 
 </details>