Skip to content

Commit dc18ac3

Browse files
docs(samples): Add Dataflow "Getting started" flex template (#8897)
* Add a basic flex template sample, for the Dataflow flex templates tutorial. * Use unique image tag in test
1 parent 27ee41e commit dc18ac3

5 files changed

Lines changed: 512 additions & 0 deletions

File tree

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Dataflow flex template: Getting started sample
2+
3+
## Before you begin
4+
5+
Make sure you have followed the
6+
[Dataflow setup instructions](../../README.md).
7+
8+
## Create a Cloud Storage bucket
9+
10+
```sh
11+
export BUCKET="your-bucket"
12+
gcloud storage buckets create gs://$BUCKET
13+
```
14+
15+
## Create an Artifact Registry repository
16+
17+
```sh
18+
export REGION="us-central1"
19+
export REPOSITORY="your-repository"
20+
21+
gcloud artifacts repositories create $REPOSITORY \
22+
--repository-format=docker \
23+
--location=$REGION
24+
```
25+
26+
## Build the JAR file
27+
28+
```sh
29+
mvn clean package
30+
```
31+
32+
## Build the template
33+
34+
```sh
35+
export PROJECT="project-id"
36+
37+
gcloud dataflow flex-template build gs://$BUCKET/getting_started_java.json \
38+
--image-gcr-path "$REGION-docker.pkg.dev/$PROJECT/$REPOSITORY/getting-started-java:latest" \
39+
--sdk-language "JAVA" \
40+
--flex-template-base-image JAVA11 \
41+
--metadata-file "metadata.json" \
42+
--jar "target/flex-template-getting-started-1.0.jar" \
43+
--env FLEX_TEMPLATE_JAVA_MAIN_CLASS="com.example.dataflow.FlexTemplateGettingStarted"
44+
```
45+
46+
## Run the template
47+
48+
```sh
49+
50+
gcloud dataflow flex-template run "flex-`date +%Y%m%d-%H%M%S`" \
51+
--template-file-gcs-location "gs://$BUCKET/getting_started_java.json" \
52+
--region $REGION \
53+
--parameters output="gs://$BUCKET/output-"
54+
```
55+
56+
## Clean up
57+
58+
To delete the resources that you created:
59+
60+
```sh
61+
gcloud artifacts repositories delete $REPOSITORY --location $REGION --quiet
62+
gcloud storage rm gs://$BUCKET --recursive
63+
```
64+
65+
66+
## What's next?
67+
68+
For more information about building and running flex templates, see
69+
📝 [Use Flex Templates](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates).
70+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "Getting started Java flex template",
3+
"description": "An example flex template for Java.",
4+
"parameters": [
5+
{
6+
"name": "output",
7+
"label": "Output destination",
8+
"helpText": "The path and filename prefix for writing output files.",
9+
"regexes": [
10+
"^gs:\\/\\/[^\\n\\r]+$"
11+
]
12+
}
13+
]
14+
}
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
-->
18+
<project xmlns="http://maven.apache.org/POM/4.0.0"
19+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
20+
<modelVersion>4.0.0</modelVersion>
21+
22+
<parent>
23+
<groupId>com.google.cloud.samples</groupId>
24+
<artifactId>shared-configuration</artifactId>
25+
<version>1.2.0</version>
26+
</parent>
27+
28+
<groupId>com.example.dataflow</groupId>
29+
<artifactId>flex-template-getting-started</artifactId>
30+
<version>1.0</version>
31+
32+
<properties>
33+
<maven.compiler.source>11</maven.compiler.source>
34+
<maven.compiler.target>11</maven.compiler.target>
35+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
36+
<beam.version>2.49.0</beam.version>
37+
38+
<maven-enforcer-plugin.version>3.4.1</maven-enforcer-plugin.version>
39+
<maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version>
40+
<maven-shade-plugin.version>3.5.0</maven-shade-plugin.version>
41+
<maven-exec-plugin.version>3.1.0</maven-exec-plugin.version>
42+
<slf4j.version>2.0.8</slf4j.version>
43+
</properties>
44+
45+
<repositories>
46+
<repository>
47+
<id>apache.snapshots</id>
48+
<name>Apache Development Snapshot Repository</name>
49+
<url>https://repository.apache.org/content/repositories/snapshots/</url>
50+
<releases>
51+
<enabled>false</enabled>
52+
</releases>
53+
</repository>
54+
</repositories>
55+
56+
<build>
57+
<plugins>
58+
<plugin>
59+
<groupId>org.apache.maven.plugins</groupId>
60+
<artifactId>maven-enforcer-plugin</artifactId>
61+
<version>${maven-enforcer-plugin.version}</version>
62+
<executions>
63+
<execution>
64+
<id>enforce-maven</id>
65+
<goals>
66+
<goal>enforce</goal>
67+
</goals>
68+
<configuration>
69+
<rules>
70+
<requireMavenVersion>
71+
<version>3.0.5</version>
72+
</requireMavenVersion>
73+
</rules>
74+
</configuration>
75+
</execution>
76+
</executions>
77+
</plugin>
78+
79+
<plugin>
80+
<groupId>org.apache.maven.plugins</groupId>
81+
<artifactId>maven-compiler-plugin</artifactId>
82+
<version>${maven-compiler-plugin.version}</version>
83+
</plugin>
84+
85+
<!-- The maven shade plugin is used to create an uber-jar with all the
86+
dependencies needed to run as a standalone jar.
87+
Do not minimize the jar since that removes some of the required
88+
classes for the runners. -->
89+
<plugin>
90+
<groupId>org.apache.maven.plugins</groupId>
91+
<artifactId>maven-shade-plugin</artifactId>
92+
<version>${maven-shade-plugin.version}</version>
93+
<executions>
94+
<execution>
95+
<phase>package</phase>
96+
<goals>
97+
<goal>shade</goal>
98+
</goals>
99+
<configuration>
100+
<transformers>
101+
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
102+
</transformers>
103+
<filters>
104+
<filter>
105+
<artifact>*:*</artifact>
106+
<excludes>
107+
<exclude>META-INF/*.SF</exclude>
108+
<exclude>META-INF/*.DSA</exclude>
109+
<exclude>META-INF/*.RSA</exclude>
110+
</excludes>
111+
</filter>
112+
</filters>
113+
</configuration>
114+
</execution>
115+
</executions>
116+
</plugin>
117+
</plugins>
118+
119+
<pluginManagement>
120+
<plugins>
121+
<plugin>
122+
<groupId>org.codehaus.mojo</groupId>
123+
<artifactId>exec-maven-plugin</artifactId>
124+
<version>${maven-exec-plugin.version}</version>
125+
<configuration>
126+
<cleanupDaemonThreads>false</cleanupDaemonThreads>
127+
</configuration>
128+
</plugin>
129+
</plugins>
130+
</pluginManagement>
131+
</build>
132+
133+
<dependencies>
134+
<dependency>
135+
<groupId>org.slf4j</groupId>
136+
<artifactId>slf4j-api</artifactId>
137+
<version>${slf4j.version}</version>
138+
</dependency>
139+
<dependency>
140+
<groupId>org.slf4j</groupId>
141+
<artifactId>slf4j-jdk14</artifactId>
142+
<version>${slf4j.version}</version>
143+
<scope>runtime</scope>
144+
</dependency>
145+
146+
<!-- Apache Beam
147+
To run on another of the Beam runners, add its module to this pom.xml
148+
according to the runner-specific setup instructions on the Beam website:
149+
http://beam.apache.org/documentation/#runners
150+
-->
151+
<dependency>
152+
<groupId>org.apache.beam</groupId>
153+
<artifactId>beam-sdks-java-core</artifactId>
154+
<version>${beam.version}</version>
155+
</dependency>
156+
157+
<!-- Direct Runner -->
158+
<dependency>
159+
<groupId>org.apache.beam</groupId>
160+
<artifactId>beam-runners-direct-java</artifactId>
161+
<version>${beam.version}</version>
162+
<scope>runtime</scope>
163+
</dependency>
164+
165+
<!-- Dataflow Runner -->
166+
<dependency>
167+
<groupId>org.apache.beam</groupId>
168+
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
169+
<version>${beam.version}</version>
170+
<scope>runtime</scope>
171+
</dependency>
172+
173+
<!-- Google Cloud I/O -->
174+
<dependency>
175+
<groupId>org.apache.beam</groupId>
176+
<artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
177+
<version>${beam.version}</version>
178+
</dependency>
179+
<dependency>
180+
<groupId>com.google.cloud</groupId>
181+
<artifactId>google-cloud-dataflow</artifactId>
182+
<version>0.34.0</version>
183+
<scope>test</scope>
184+
</dependency>
185+
<dependency>
186+
<groupId>com.google.cloud</groupId>
187+
<artifactId>google-cloud-artifact-registry</artifactId>
188+
<version>1.29.0</version>
189+
<scope>test</scope>
190+
</dependency>
191+
<dependency>
192+
<groupId>com.google.cloud</groupId>
193+
<artifactId>google-cloud-storage</artifactId>
194+
<version>2.29.1</version>
195+
</dependency>
196+
197+
<dependency>
198+
<groupId>junit</groupId>
199+
<artifactId>junit</artifactId>
200+
<version>4.13.2</version>
201+
<scope>test</scope>
202+
</dependency>
203+
</dependencies>
204+
</project>
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.example.dataflow;
16+
17+
import java.util.Arrays;
18+
import java.util.List;
19+
import org.apache.beam.sdk.Pipeline;
20+
import org.apache.beam.sdk.io.TextIO;
21+
import org.apache.beam.sdk.options.Description;
22+
import org.apache.beam.sdk.options.PipelineOptions;
23+
import org.apache.beam.sdk.options.PipelineOptionsFactory;
24+
import org.apache.beam.sdk.options.Validation;
25+
import org.apache.beam.sdk.transforms.Create;
26+
27+
/**
28+
* An Apache Beam batch pipeline that writes data to Cloud Storage.
29+
*/
30+
public class FlexTemplateGettingStarted {
31+
32+
public interface Options extends PipelineOptions {
33+
@Description("The Cloud Storage bucket to write to")
34+
@Validation.Required
35+
String getOutput();
36+
37+
void setOutput(String value);
38+
}
39+
40+
// Write text data to Cloud Storage.
41+
public static void main(String[] args) {
42+
final List<String> wordsList = Arrays.asList("1", "2", "3", "4");
43+
44+
var options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
45+
var pipeline = Pipeline.create(options);
46+
pipeline
47+
.apply(Create.of(wordsList))
48+
.apply(TextIO
49+
.write()
50+
.to(options.getOutput())
51+
.withSuffix(".txt")
52+
);
53+
54+
// For a Dataflow Flex Template, do NOT call waitUntilFinish().
55+
pipeline.run();
56+
}
57+
}

0 commit comments

Comments
 (0)