diff --git a/.github/workflows/build-report.yml b/.github/workflows/build-report.yml new file mode 100644 index 000000000..22f7ea248 --- /dev/null +++ b/.github/workflows/build-report.yml @@ -0,0 +1,57 @@ +# Copyright © 2024 Cask Data, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# This workflow will build a Java project with Maven +# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven +# Note: Any changes to this workflow would be used only after merging into develop +name: Build Unit Tests Report + +on: + workflow_run: + workflows: + - Build with unit tests + types: + - completed + +permissions: + actions: read # Allows reading workflow run information + statuses: write # Required if the action updates commit statuses + checks: write # Required if it updates GitHub Checks API + + +jobs: + build: + runs-on: ubuntu-latest + + if: ${{ github.event.workflow_run.conclusion != 'skipped' }} + + steps: + # Pinned 1.0.0 version + - uses: marocchino/action-workflow_run-status@54b6e87d6cb552fc5f36dbe9a722a6048725917a + + - name: Download artifact + uses: actions/download-artifact@v4 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ github.event.workflow_run.id }} + path: artifacts/ + + - name: Surefire Report + # Pinned 3.5.2 version + uses: mikepenz/action-junit-report@16a9560bd02f11e7e3bf6b3e2ef6bba6c9d07c32 + if: always() + with: + report_paths: '**/target/surefire-reports/TEST-*.xml' + github_token: ${{ secrets.GITHUB_TOKEN }} + detailed_summary: true + commit: ${{ github.event.workflow_run.head_sha }} + check_name: Build Test Report + diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6ce0eb526..55cd4617e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,28 +15,34 @@ name: Build with unit tests on: - workflow_run: - workflows: - - Trigger build - types: - - completed + push: + branches: [ develop, release/** ] + pull_request: + branches: [ develop, release/** ] + types: [opened, synchronize, reopened, labeled] jobs: build: runs-on: k8s-runner-build - if: ${{ github.event.workflow_run.conclusion != 'skipped' }} - + # We allow builds: + # 1) When it's a merge into a branch + # 2) For PRs that are labeled as build and + # - It's a code change + # - A build label was just added + # A bit complex, but prevents builds when other labels are manipulated + if: > + github.event_name == 'push' + || (contains(github.event.pull_request.labels.*.name, 'build') + && (github.event.action != 'labeled' || github.event.label.name == 'build') + ) steps: - # Pinned 1.0.0 version - - uses: haya14busa/action-workflow_run-status@967ed83efa565c257675ed70cfe5231f062ddd94 - - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.workflow_run.head_sha }} - name: Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }} @@ -47,21 +53,12 @@ jobs: run: mvn clean test -fae -T 2 -B -V -Dmaven.wagon.http.retryHandler.count=3 -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 - name: Archive build artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: always() with: - name: Build debug files + name: reports-${{ github.run_id }} path: | **/target/rat.txt **/target/surefire-reports/* - - name: Surefire Report - # Pinned 3.5.2 version - uses: mikepenz/action-junit-report@16a9560bd02f11e7e3bf6b3e2ef6bba6c9d07c32 - if: always() - with: - report_paths: '**/target/surefire-reports/TEST-*.xml' - github_token: ${{ secrets.GITHUB_TOKEN }} - detailed_summary: true - commit: ${{ github.event.workflow_run.head_sha }} - check_name: Test Report \ No newline at end of file + diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 1ced579cb..c710ef929 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -16,9 +16,9 @@ name: Build e2e tests on: push: - branches: [ develop ] + branches: [ develop, release/** ] pull_request: - branches: [ develop ] + branches: [ develop, release/** ] types: [ opened, synchronize, reopened, labeled ] workflow_dispatch: @@ -45,7 +45,7 @@ jobs: steps: # Pinned 1.0.0 version - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: path: plugin submodules: 'recursive' @@ -61,13 +61,13 @@ jobs: - '${{ matrix.module }}/**/e2e-test/**' - name: Checkout e2e test repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: cdapio/cdap-e2e-tests path: e2e - name: Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }} @@ -156,24 +156,21 @@ jobs: CLOUDSQL_MYSQL_PASSWORD: ${{ steps.secrets.outputs.CLOUDSQL_MYSQL_PASSWORD }} CLOUDSQL_MYSQL_CONNECTION_NAME: ${{ steps.secrets.outputs.CLOUDSQL_MYSQL_CONNECTION_NAME }} - - name: Upload report - uses: actions/upload-artifact@v3 - if: always() - with: - name: Cucumber report - ${{ matrix.module }} - path: ./**/target/cucumber-reports - - name: Upload debug files - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: always() with: name: Debug files - ${{ matrix.module }} path: ./**/target/e2e-debug - name: Upload files to GCS - uses: google-github-actions/upload-cloud-storage@v0 + uses: google-github-actions/upload-cloud-storage@v2 if: always() with: path: ./plugin destination: e2e-tests-cucumber-reports/${{ github.event.repository.name }}/${{ github.ref }} glob: '**/target/cucumber-reports/**' + + - name: Cucumber Report URL + if: always() + run: echo "https://storage.googleapis.com/e2e-tests-cucumber-reports/${{ github.event.repository.name }}/${{ github.ref }}/plugin/${{ matrix.module }}/target/cucumber-reports/advanced-reports/cucumber-html-reports/overview-features.html" diff --git a/.github/workflows/trigger.yml b/.github/workflows/trigger.yml deleted file mode 100644 index 11db8ac25..000000000 --- a/.github/workflows/trigger.yml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright © 2022 Cask Data, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# This workflow will trigger build.yml only when needed. -# This way we don't flood main workflow run list -# Note that build.yml from develop will be used even for PR builds -# Also it will have access to the proper GITHUB_SECRET - -name: Trigger build - -on: - push: - branches: [ develop, release/** ] - pull_request: - branches: [ develop, release/** ] - types: [opened, synchronize, reopened, labeled] - workflow_dispatch: - -jobs: - trigger: - runs-on: ubuntu-latest - - # We allow builds: - # 1) When triggered manually - # 2) When it's a merge into a branch - # 3) For PRs that are labeled as build and - # - It's a code change - # - A build label was just added - # A bit complex, but prevents builds when other labels are manipulated - if: > - github.event_name == 'workflow_dispatch' - || github.event_name == 'push' - || (contains(github.event.pull_request.labels.*.name, 'build') - && (github.event.action != 'labeled' || github.event.label.name == 'build') - ) - steps: - - name: Trigger build - run: echo Maven build will be triggered now \ No newline at end of file diff --git a/amazon-redshift-plugin/docs/Redshift-batchsource.md b/amazon-redshift-plugin/docs/Redshift-batchsource.md new file mode 100644 index 000000000..38873b15a --- /dev/null +++ b/amazon-redshift-plugin/docs/Redshift-batchsource.md @@ -0,0 +1,102 @@ +# Amazon Redshift Batch Source + +Description +----------- +Reads from an Amazon Redshift database using a configurable SQL query. +Outputs one record for each row returned by the query. + + +Use Case +-------- +The source is used whenever you need to read from an Amazon Redshift database. For example, you may want +to create daily snapshots of a database table by using this source and writing to +a TimePartitionedFileSet. + + +Properties +---------- +**Reference Name:** Name used to uniquely identify this source for lineage, annotating metadata, etc. + +**JDBC Driver name:** Name of the JDBC driver to use. + +**Host:** Host URL of the current master instance of Redshift cluster. + +**Port:** Port that Redshift master instance is listening to. + +**Database:** Redshift database name. + +**Import Query:** The SELECT query to use to import data from the specified table. +You can specify an arbitrary number of columns to import, or import all columns using \*. The Query should +contain the '$CONDITIONS' string. For example, 'SELECT * FROM table WHERE $CONDITIONS'. +The '$CONDITIONS' string will be replaced by 'splitBy' field limits specified by the bounding query. +The '$CONDITIONS' string is not required if numSplits is set to one. + +**Bounding Query:** Bounding Query should return the min and max of the values of the 'splitBy' field. +For example, 'SELECT MIN(id),MAX(id) FROM table'. Not required if numSplits is set to one. + +**Split-By Field Name:** Field Name which will be used to generate splits. Not required if numSplits is set to one. + +**Number of Splits to Generate:** Number of splits to generate. + +**Username:** User identity for connecting to the specified database. + +**Password:** Password to use to connect to the specified database. + +**Connection Arguments:** A list of arbitrary string key/value pairs as connection arguments. These arguments +will be passed to the JDBC driver as connection arguments for JDBC drivers that may need additional configurations. + +**Schema:** The schema of records output by the source. This will be used in place of whatever schema comes +back from the query. However, it must match the schema that comes back from the query, +except it can mark fields as nullable and can contain a subset of the fields. + +**Fetch Size:** The number of rows to fetch at a time per split. Larger fetch size can result in faster import, +with the tradeoff of higher memory usage. + +Example +------ +Suppose you want to read data from an Amazon Redshift database named "prod" that is running on +"redshift.xyz.eu-central-1.redshift.amazonaws.com", port 5439, as "sa" user with "Test11" password. +Ensure that the driver for Redshift is installed (you can also provide driver name for some specific driver, +otherwise "redshift" will be used), then configure the plugin with:then configure plugin with: + +``` +Reference Name: "src1" +Driver Name: "redshift" +Host: "redshift.xyz.eu-central-1.redshift.amazonaws.com" +Port: 5439 +Database: "prod" +Import Query: "select id, name, email, phone from users;" +Number of Splits to Generate: 1 +Username: "sa" +Password: "Test11" +``` + +Data Types Mapping +------------------ + +Mapping of Redshift types to CDAP schema: + +| Redshift Data Type | CDAP Schema Data Type | Comment | +|-----------------------------------------------------|-----------------------|----------------------------------| +| bigint | long | | +| boolean | boolean | | +| character | string | | +| character varying | string | | +| double precision | double | | +| integer | int | | +| numeric(precision, scale)/decimal(precision, scale) | decimal | | +| numeric(with 0 precision) | string | | +| real | float | | +| smallint | int | | +| smallserial | int | | +| text | string | | +| date | date | | +| time [ (p) ] [ without time zone ] | time | | +| time [ (p) ] with time zone | string | | +| timestamp [ (p) ] [ without time zone ] | timestamp | | +| timestamp [ (p) ] with time zone | timestamp | stored in UTC format in database | +| xml | string | | +| json | string | | +| super | string | | +| geometry | bytes | | +| hllsketch | string | | diff --git a/amazon-redshift-plugin/docs/Redshift-connector.md b/amazon-redshift-plugin/docs/Redshift-connector.md new file mode 100644 index 000000000..368d9e09f --- /dev/null +++ b/amazon-redshift-plugin/docs/Redshift-connector.md @@ -0,0 +1,26 @@ +# Amazon Redshift Connection + +Description +----------- +Use this connection to access data in an Amazon Redshift database using JDBC. + +Properties +---------- +**Name:** Name of the connection. Connection names must be unique in a namespace. + +**Description:** Description of the connection. + +**JDBC Driver name:** Name of the JDBC driver to use. + +**Host:** Host of the current master instance of Redshift cluster. + +**Port:** Port that Redshift master instance is listening to. + +**Database:** Redshift database name. + +**Username:** User identity for connecting to the specified database. + +**Password:** Password to use to connect to the specified database. + +**Connection Arguments:** A list of arbitrary string key/value pairs as connection arguments. These arguments +will be passed to the JDBC driver as connection arguments for JDBC drivers that may need additional configurations. diff --git a/amazon-redshift-plugin/icons/Redshift-batchsource.png b/amazon-redshift-plugin/icons/Redshift-batchsource.png new file mode 100644 index 000000000..11c334799 Binary files /dev/null and b/amazon-redshift-plugin/icons/Redshift-batchsource.png differ diff --git a/amazon-redshift-plugin/pom.xml b/amazon-redshift-plugin/pom.xml new file mode 100644 index 000000000..d66e009fc --- /dev/null +++ b/amazon-redshift-plugin/pom.xml @@ -0,0 +1,139 @@ + + + + + database-plugins-parent + io.cdap.plugin + 1.11.8 + + + Amazon Redshift plugin + amazon-redshift-plugin + 4.0.0 + + + 2.1.0.18 + + + + + redshift + http://redshift-maven-repository.s3-website-us-east-1.amazonaws.com/release + + + + + + io.cdap.cdap + cdap-etl-api + + + io.cdap.plugin + database-commons + ${project.version} + + + io.cdap.plugin + hydrator-common + + + com.google.guava + guava + + + + + com.amazon.redshift + redshift-jdbc42 + ${redshift-jdbc.version} + test + + + io.cdap.plugin + database-commons + ${project.version} + test-jar + test + + + io.cdap.cdap + hydrator-test + + + io.cdap.cdap + cdap-data-pipeline3_2.12 + + + junit + junit + + + org.mockito + mockito-core + test + + + io.cdap.cdap + cdap-api + provided + + + org.jetbrains + annotations + RELEASE + compile + + + + + + io.cdap + cdap-maven-plugin + + + org.apache.felix + maven-bundle-plugin + 5.1.2 + true + + + <_exportcontents> + io.cdap.plugin.amazon.redshift.*; + io.cdap.plugin.db.source.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* + + *;inline=false;scope=compile + true + lib + + + + + package + + bundle + + + + + + + diff --git a/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConnector.java b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConnector.java new file mode 100644 index 000000000..fb8cac4a7 --- /dev/null +++ b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConnector.java @@ -0,0 +1,117 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.cdap.api.annotation.Category; +import io.cdap.cdap.api.annotation.Description; +import io.cdap.cdap.api.annotation.Name; +import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.etl.api.batch.BatchSource; +import io.cdap.cdap.etl.api.connector.Connector; +import io.cdap.cdap.etl.api.connector.ConnectorSpec; +import io.cdap.cdap.etl.api.connector.ConnectorSpecRequest; +import io.cdap.cdap.etl.api.connector.PluginSpec; +import io.cdap.plugin.common.Constants; +import io.cdap.plugin.common.ReferenceNames; +import io.cdap.plugin.common.db.DBConnectorPath; +import io.cdap.plugin.common.db.DBPath; +import io.cdap.plugin.db.SchemaReader; +import io.cdap.plugin.db.connector.AbstractDBSpecificConnector; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Amazon Redshift Database Connector that connects to Amazon Redshift database via JDBC. + */ +@Plugin(type = Connector.PLUGIN_TYPE) +@Name(RedshiftConnector.NAME) +@Description("Connection to access data in Amazon Redshift using JDBC.") +@Category("Database") +public class RedshiftConnector extends AbstractDBSpecificConnector { + public static final String NAME = RedshiftConstants.PLUGIN_NAME; + private final RedshiftConnectorConfig config; + + public RedshiftConnector(RedshiftConnectorConfig config) { + super(config); + this.config = config; + } + + @Override + protected DBConnectorPath getDBConnectorPath(String path) throws IOException { + return new DBPath(path, true); + } + + @Override + public boolean supportSchema() { + return true; + } + + @Override + protected Class getDBRecordType() { + return RedshiftDBRecord.class; + } + + @Override + public StructuredRecord transform(LongWritable longWritable, RedshiftDBRecord redshiftDBRecord) { + return redshiftDBRecord.getRecord(); + } + + @Override + protected SchemaReader getSchemaReader(String sessionID) { + return new RedshiftSchemaReader(sessionID); + } + + @Override + protected String getTableName(String database, String schema, String table) { + return String.format("\"%s\".\"%s\"", schema, table); + } + + @Override + protected String getRandomQuery(String tableName, int limit) { + return String.format("SELECT * FROM %s\n" + + "TABLESAMPLE BERNOULLI (100.0 * %d / (SELECT COUNT(*) FROM %s))", + tableName, limit, tableName); + } + + @Override + protected void setConnectorSpec(ConnectorSpecRequest request, DBConnectorPath path, + ConnectorSpec.Builder builder) { + Map sourceProperties = new HashMap<>(); + setConnectionProperties(sourceProperties, request); + builder + .addRelatedPlugin(new PluginSpec(RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, sourceProperties)); + + String schema = path.getSchema(); + sourceProperties.put(RedshiftSource.RedshiftSourceConfig.NUM_SPLITS, "1"); + sourceProperties.put(RedshiftSource.RedshiftSourceConfig.FETCH_SIZE, + RedshiftSource.RedshiftSourceConfig.DEFAULT_FETCH_SIZE); + String table = path.getTable(); + if (table == null) { + return; + } + sourceProperties.put(RedshiftSource.RedshiftSourceConfig.IMPORT_QUERY, + getTableQuery(path.getDatabase(), schema, table)); + sourceProperties.put(Constants.Reference.REFERENCE_NAME, ReferenceNames.cleanseReferenceName(table)); + } + +} diff --git a/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorConfig.java b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorConfig.java new file mode 100644 index 000000000..f05f26d10 --- /dev/null +++ b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorConfig.java @@ -0,0 +1,87 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.cdap.api.annotation.Description; +import io.cdap.cdap.api.annotation.Macro; +import io.cdap.cdap.api.annotation.Name; +import io.cdap.plugin.db.ConnectionConfig; +import io.cdap.plugin.db.connector.AbstractDBConnectorConfig; + +import javax.annotation.Nullable; + +/** + * Configuration for Redshift connector + */ +public class RedshiftConnectorConfig extends AbstractDBConnectorConfig { + + @Name(ConnectionConfig.HOST) + @Description( + "The endpoint of the Amazon Redshift cluster.") + @Macro + private String host; + + @Name(ConnectionConfig.PORT) + @Description("Database port number") + @Macro + @Nullable + private Integer port; + + @Name(ConnectionConfig.DATABASE) + @Description("Database name to connect to") + @Macro + private String database; + + public RedshiftConnectorConfig(String username, String password, String jdbcPluginName, + String connectionArguments, String host, + String database, @Nullable Integer port) { + this.user = username; + this.password = password; + this.jdbcPluginName = jdbcPluginName; + this.connectionArguments = connectionArguments; + this.host = host; + this.database = database; + this.port = port; + } + + public String getDatabase() { + return database; + } + + public String getHost() { + return host; + } + + public int getPort() { + return port == null ? 5439 : port; + } + + @Override + public String getConnectionString() { + return String.format( + RedshiftConstants.REDSHIFT_CONNECTION_STRING_FORMAT, + host, + getPort(), + database); + } + + @Override + public boolean canConnect() { + return super.canConnect() && !containsMacro(ConnectionConfig.HOST) && + !containsMacro(ConnectionConfig.PORT) && !containsMacro(ConnectionConfig.DATABASE); + } +} diff --git a/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConstants.java b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConstants.java new file mode 100644 index 000000000..081052fb1 --- /dev/null +++ b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftConstants.java @@ -0,0 +1,27 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +/** Amazon Redshift constants. */ +public final class RedshiftConstants { + + private RedshiftConstants() { + } + + public static final String PLUGIN_NAME = "Redshift"; + public static final String REDSHIFT_CONNECTION_STRING_FORMAT = "jdbc:redshift://%s:%s/%s"; +} diff --git a/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftDBRecord.java b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftDBRecord.java new file mode 100644 index 000000000..38e9140d8 --- /dev/null +++ b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftDBRecord.java @@ -0,0 +1,129 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.api.data.schema.Schema; +import io.cdap.plugin.db.DBRecord; +import io.cdap.plugin.db.SchemaReader; +import io.cdap.plugin.util.DBUtils; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.OffsetDateTime; +import java.time.ZoneId; +import java.time.ZonedDateTime; + +/** + * Writable class for Redshift Source + */ +public class RedshiftDBRecord extends DBRecord { + + /** + * Used in map-reduce. Do not remove. + */ + @SuppressWarnings("unused") + public RedshiftDBRecord() { + } + + @Override + protected void handleField(ResultSet resultSet, StructuredRecord.Builder recordBuilder, Schema.Field field, + int columnIndex, int sqlType, int sqlPrecision, int sqlScale) throws SQLException { + ResultSetMetaData metadata = resultSet.getMetaData(); + String columnTypeName = metadata.getColumnTypeName(columnIndex); + if (isUseSchema(metadata, columnIndex)) { + setFieldAccordingToSchema(resultSet, recordBuilder, field, columnIndex); + return; + } + + // HandleTimestamp + if (sqlType == Types.TIMESTAMP && columnTypeName.equalsIgnoreCase("timestamp")) { + Timestamp timestamp = resultSet.getTimestamp(columnIndex, DBUtils.PURE_GREGORIAN_CALENDAR); + if (timestamp != null) { + ZonedDateTime zonedDateTime = OffsetDateTime.of(timestamp.toLocalDateTime(), OffsetDateTime.now().getOffset()) + .atZoneSameInstant(ZoneId.of("UTC")); + Schema nonNullableSchema = field.getSchema().isNullable() ? + field.getSchema().getNonNullable() : field.getSchema(); + setZonedDateTimeBasedOnOutputSchema(recordBuilder, nonNullableSchema.getLogicalType(), + field.getName(), zonedDateTime); + } else { + recordBuilder.set(field.getName(), null); + } + return; + } + + // HandleTimestampTZ + if (sqlType == Types.TIMESTAMP && columnTypeName.equalsIgnoreCase("timestamptz")) { + OffsetDateTime timestamp = resultSet.getObject(columnIndex, OffsetDateTime.class); + if (timestamp != null) { + recordBuilder.setTimestamp(field.getName(), timestamp.atZoneSameInstant(ZoneId.of("UTC"))); + } else { + recordBuilder.set(field.getName(), null); + } + return; + } + + // HandleNumeric + int columnType = metadata.getColumnType(columnIndex); + if (columnType == Types.NUMERIC) { + Schema nonNullableSchema = field.getSchema().isNullable() ? + field.getSchema().getNonNullable() : field.getSchema(); + int precision = metadata.getPrecision(columnIndex); + if (precision == 0 && Schema.Type.STRING.equals(nonNullableSchema.getType())) { + // When output schema is set to String for precision less numbers + recordBuilder.set(field.getName(), resultSet.getString(columnIndex)); + } else if (Schema.LogicalType.DECIMAL.equals(nonNullableSchema.getLogicalType())) { + BigDecimal originalDecimalValue = resultSet.getBigDecimal(columnIndex); + if (originalDecimalValue != null) { + BigDecimal newDecimalValue = new BigDecimal(originalDecimalValue.toPlainString()) + .setScale(nonNullableSchema.getScale(), RoundingMode.HALF_EVEN); + recordBuilder.setDecimal(field.getName(), newDecimalValue); + } + } + return; + } + setField(resultSet, recordBuilder, field, columnIndex, sqlType, sqlPrecision, sqlScale); + } + + private void setZonedDateTimeBasedOnOutputSchema(StructuredRecord.Builder recordBuilder, + Schema.LogicalType logicalType, + String fieldName, + ZonedDateTime zonedDateTime) { + if (Schema.LogicalType.DATETIME.equals(logicalType)) { + recordBuilder.setDateTime(fieldName, zonedDateTime.toLocalDateTime()); + } else if (Schema.LogicalType.TIMESTAMP_MICROS.equals(logicalType)) { + recordBuilder.setTimestamp(fieldName, zonedDateTime); + } + } + + private static boolean isUseSchema(ResultSetMetaData metadata, int columnIndex) throws SQLException { + String columnTypeName = metadata.getColumnTypeName(columnIndex); + // If the column Type Name is present in the String mapped Redshift types then return true. + return RedshiftSchemaReader.STRING_MAPPED_REDSHIFT_TYPES_NAMES.contains(columnTypeName); + } + + @Override + protected SchemaReader getSchemaReader() { + return new RedshiftSchemaReader(); + } + +} diff --git a/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftSchemaReader.java b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftSchemaReader.java new file mode 100644 index 000000000..df9938a45 --- /dev/null +++ b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftSchemaReader.java @@ -0,0 +1,117 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import io.cdap.cdap.api.data.schema.Schema; +import io.cdap.plugin.db.CommonSchemaReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; +import java.util.List; +import java.util.Set; + +/** + * Redshift Schema Reader class + */ +public class RedshiftSchemaReader extends CommonSchemaReader { + + private static final Logger LOG = LoggerFactory.getLogger(RedshiftSchemaReader.class); + + public static final Set STRING_MAPPED_REDSHIFT_TYPES_NAMES = ImmutableSet.of( + "timetz", "money" + ); + + private final String sessionID; + + public RedshiftSchemaReader() { + this(null); + } + + public RedshiftSchemaReader(String sessionID) { + super(); + this.sessionID = sessionID; + } + + @Override + public Schema getSchema(ResultSetMetaData metadata, int index) throws SQLException { + String typeName = metadata.getColumnTypeName(index); + int columnType = metadata.getColumnType(index); + + if (STRING_MAPPED_REDSHIFT_TYPES_NAMES.contains(typeName)) { + return Schema.of(Schema.Type.STRING); + } + if (typeName.equalsIgnoreCase("INT")) { + return Schema.of(Schema.Type.INT); + } + if (typeName.equalsIgnoreCase("BIGINT")) { + return Schema.of(Schema.Type.LONG); + } + + // If it is a numeric type without precision then use the Schema of String to avoid any precision loss + if (Types.NUMERIC == columnType) { + int precision = metadata.getPrecision(index); + if (precision == 0) { + LOG.warn(String.format("Field '%s' is a %s type without precision and scale, " + + "converting into STRING type to avoid any precision loss.", + metadata.getColumnName(index), + metadata.getColumnTypeName(index))); + return Schema.of(Schema.Type.STRING); + } + } + + if (typeName.equalsIgnoreCase("timestamp")) { + return Schema.of(Schema.LogicalType.DATETIME); + } + + return super.getSchema(metadata, index); + } + + @Override + public boolean shouldIgnoreColumn(ResultSetMetaData metadata, int index) throws SQLException { + if (sessionID == null) { + return false; + } + return metadata.getColumnName(index).equals("c_" + sessionID) || + metadata.getColumnName(index).equals("sqn_" + sessionID); + } + + @Override + public List getSchemaFields(ResultSet resultSet) throws SQLException { + List schemaFields = Lists.newArrayList(); + ResultSetMetaData metadata = resultSet.getMetaData(); + // ResultSetMetadata columns are numbered starting with 1 + for (int i = 1; i <= metadata.getColumnCount(); i++) { + if (shouldIgnoreColumn(metadata, i)) { + continue; + } + String columnName = metadata.getColumnName(i); + Schema columnSchema = getSchema(metadata, i); + // Setting up schema as nullable as cdata driver doesn't provide proper information about isNullable. + columnSchema = Schema.nullableOf(columnSchema); + Schema.Field field = Schema.Field.of(columnName, columnSchema); + schemaFields.add(field); + } + return schemaFields; + } + +} diff --git a/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftSource.java b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftSource.java new file mode 100644 index 000000000..1b5894de9 --- /dev/null +++ b/amazon-redshift-plugin/src/main/java/io/cdap/plugin/amazon/redshift/RedshiftSource.java @@ -0,0 +1,128 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.cdap.api.annotation.Description; +import io.cdap.cdap.api.annotation.Macro; +import io.cdap.cdap.api.annotation.Metadata; +import io.cdap.cdap.api.annotation.MetadataProperty; +import io.cdap.cdap.api.annotation.Name; +import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.etl.api.FailureCollector; +import io.cdap.cdap.etl.api.batch.BatchSource; +import io.cdap.cdap.etl.api.batch.BatchSourceContext; +import io.cdap.cdap.etl.api.connector.Connector; +import io.cdap.plugin.common.Asset; +import io.cdap.plugin.common.ConfigUtil; +import io.cdap.plugin.common.LineageRecorder; +import io.cdap.plugin.db.SchemaReader; +import io.cdap.plugin.db.config.AbstractDBSpecificSourceConfig; +import io.cdap.plugin.db.source.AbstractDBSource; +import io.cdap.plugin.util.DBUtils; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import java.util.Collections; +import java.util.Map; +import javax.annotation.Nullable; + +/** + * Batch source to read from an Amazon Redshift database. + */ +@Plugin(type = BatchSource.PLUGIN_TYPE) +@Name(RedshiftConstants.PLUGIN_NAME) +@Description( + "Reads from a Amazon Redshift database table(s) using a configurable SQL query." + + " Outputs one record for each row returned by the query.") +@Metadata(properties = {@MetadataProperty(key = Connector.PLUGIN_TYPE, value = RedshiftConnector.NAME)}) +public class RedshiftSource + extends AbstractDBSource { + + private final RedshiftSourceConfig redshiftSourceConfig; + + public RedshiftSource(RedshiftSourceConfig redshiftSourceConfig) { + super(redshiftSourceConfig); + this.redshiftSourceConfig = redshiftSourceConfig; + } + + @Override + protected SchemaReader getSchemaReader() { + return new RedshiftSchemaReader(); + } + + @Override + protected Class getDBRecordType() { + return RedshiftDBRecord.class; + } + + @Override + protected String createConnectionString() { + return String.format( + RedshiftConstants.REDSHIFT_CONNECTION_STRING_FORMAT, + redshiftSourceConfig.connection.getHost(), + redshiftSourceConfig.connection.getPort(), + redshiftSourceConfig.connection.getDatabase()); + } + + @Override + protected LineageRecorder getLineageRecorder(BatchSourceContext context) { + String fqn = DBUtils.constructFQN("redshift", redshiftSourceConfig.getConnection().getHost(), + redshiftSourceConfig.getConnection().getPort(), + redshiftSourceConfig.getConnection().getDatabase(), + redshiftSourceConfig.getReferenceName()); + Asset.Builder assetBuilder = Asset.builder(redshiftSourceConfig.getReferenceName()).setFqn(fqn); + return new LineageRecorder(context, assetBuilder.build()); + } + + /** + * Redshift source config. + */ + public static class RedshiftSourceConfig extends AbstractDBSpecificSourceConfig { + + @Name(ConfigUtil.NAME_USE_CONNECTION) + @Nullable + @Description("Whether to use an existing connection.") + private Boolean useConnection; + + @Name(ConfigUtil.NAME_CONNECTION) + @Macro + @Nullable + @Description("The existing connection to use.") + private RedshiftConnectorConfig connection; + + @Override + public Map getDBSpecificArguments() { + return Collections.emptyMap(); + } + + @Override + public Integer getFetchSize() { + Integer fetchSize = super.getFetchSize(); + return fetchSize == null ? Integer.parseInt(DEFAULT_FETCH_SIZE) : fetchSize; + } + + @Override + protected RedshiftConnectorConfig getConnection() { + return connection; + } + + @Override + public void validate(FailureCollector collector) { + ConfigUtil.validateConnection(this, useConnection, connection, collector); + super.validate(collector); + } + } +} diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorTest.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorTest.java new file mode 100644 index 000000000..a43eb4302 --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorTest.java @@ -0,0 +1,39 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.plugin.db.connector.DBSpecificConnectorBaseTest; +import org.junit.Test; + +import java.io.IOException; + +/** + * Unit tests for {@link RedshiftConnector} + */ +public class RedshiftConnectorTest extends DBSpecificConnectorBaseTest { + + private static final String JDBC_DRIVER_CLASS_NAME = "com.amazon.redshift.Driver"; + + @Test + public void test() throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { + test(new RedshiftConnector( + new RedshiftConnectorConfig(username, password, JDBC_PLUGIN_NAME, connectionArguments, host, database, + port)), + JDBC_DRIVER_CLASS_NAME, RedshiftConstants.PLUGIN_NAME); + } +} + diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorUnitTest.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorUnitTest.java new file mode 100644 index 000000000..39579cb60 --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftConnectorUnitTest.java @@ -0,0 +1,55 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +/** + * Unit tests for {@link RedshiftConnector} + */ +public class RedshiftConnectorUnitTest { + @Rule + public ExpectedException expectedEx = ExpectedException.none(); + + private static final RedshiftConnector CONNECTOR = new RedshiftConnector(null); + + /** + * Unit test for getTableName() + */ + @Test + public void getTableNameTest() { + Assert.assertEquals("\"schema\".\"table\"", + CONNECTOR.getTableName("db", "schema", "table")); + } + + /** + * Unit tests for getTableQuery() + */ + @Test + public void getTableQueryTest() { + String tableName = CONNECTOR.getTableName("db", "schema", "table"); + + // random query + Assert.assertEquals(String.format("SELECT * FROM %s\n" + + "TABLESAMPLE BERNOULLI (100.0 * %d / (SELECT COUNT(*) FROM %s))", + tableName, 100, tableName), + CONNECTOR.getRandomQuery(tableName, 100)); + } +} diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftDBRecordUnitTest.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftDBRecordUnitTest.java new file mode 100644 index 000000000..4d11004e4 --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftDBRecordUnitTest.java @@ -0,0 +1,155 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.api.data.schema.Schema; +import io.cdap.plugin.util.DBUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +import java.math.BigDecimal; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.OffsetDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; + +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.when; + +/** + * Unit Test class for the PostgresDBRecord + */ +@RunWith(MockitoJUnitRunner.class) +public class RedshiftDBRecordUnitTest { + + private static final int DEFAULT_PRECISION = 38; + + /** + * Validate the precision less Numbers handling against following use cases. + * 1. Ensure that the numeric type with [p,s] set as [38,4] detect as BigDecimal(38,4) in cdap. + * 2. Ensure that the numeric type without [p,s] detect as String type in cdap. + * + * @throws Exception + */ + @Test + public void validatePrecisionLessDecimalParsing() throws Exception { + Schema.Field field1 = Schema.Field.of("ID1", Schema.decimalOf(DEFAULT_PRECISION, 4)); + Schema.Field field2 = Schema.Field.of("ID2", Schema.of(Schema.Type.STRING)); + + Schema schema = Schema.recordOf( + "dbRecord", + field1, + field2 + ); + + ResultSetMetaData resultSetMetaData = Mockito.mock(ResultSetMetaData.class); + Mockito.when(resultSetMetaData.getColumnType(Mockito.eq(1))).thenReturn(Types.NUMERIC); + Mockito.when(resultSetMetaData.getPrecision(Mockito.eq(1))).thenReturn(DEFAULT_PRECISION); + Mockito.when(resultSetMetaData.getColumnType(eq(2))).thenReturn(Types.NUMERIC); + when(resultSetMetaData.getPrecision(eq(2))).thenReturn(0); + + ResultSet resultSet = Mockito.mock(ResultSet.class); + + when(resultSet.getMetaData()).thenReturn(resultSetMetaData); + when(resultSet.getBigDecimal(eq(1))).thenReturn(BigDecimal.valueOf(123.4568)); + when(resultSet.getString(eq(2))).thenReturn("123.4568"); + + StructuredRecord.Builder builder = StructuredRecord.builder(schema); + RedshiftDBRecord dbRecord = new RedshiftDBRecord(); + dbRecord.handleField(resultSet, builder, field1, 1, Types.NUMERIC, DEFAULT_PRECISION, 4); + dbRecord.handleField(resultSet, builder, field2, 2, Types.NUMERIC, 0, -127); + + StructuredRecord record = builder.build(); + Assert.assertTrue(record.getDecimal("ID1") instanceof BigDecimal); + Assert.assertEquals(record.getDecimal("ID1"), BigDecimal.valueOf(123.4568)); + Assert.assertTrue(record.get("ID2") instanceof String); + Assert.assertEquals(record.get("ID2"), "123.4568"); + } + + @Test + public void validateTimestampType() throws SQLException { + OffsetDateTime offsetDateTime = OffsetDateTime.of(2023, 1, 1, 1, 0, 0, 0, ZoneOffset.UTC); + ResultSetMetaData metaData = Mockito.mock(ResultSetMetaData.class); + when(metaData.getColumnTypeName(eq(0))).thenReturn("timestamp"); + + ResultSet resultSet = Mockito.mock(ResultSet.class); + when(resultSet.getMetaData()).thenReturn(metaData); + when(resultSet.getTimestamp(eq(0), eq(DBUtils.PURE_GREGORIAN_CALENDAR))) + .thenReturn(Timestamp.from(offsetDateTime.toInstant())); + + Schema.Field field1 = Schema.Field.of("field1", Schema.of(Schema.LogicalType.DATETIME)); + Schema schema = Schema.recordOf( + "dbRecord", + field1 + ); + StructuredRecord.Builder builder = StructuredRecord.builder(schema); + + RedshiftDBRecord dbRecord = new RedshiftDBRecord(); + dbRecord.handleField(resultSet, builder, field1, 0, Types.TIMESTAMP, 0, 0); + StructuredRecord record = builder.build(); + Assert.assertNotNull(record); + Assert.assertNotNull(record.getDateTime("field1")); + Assert.assertEquals(record.getDateTime("field1").toInstant(ZoneOffset.UTC), offsetDateTime.toInstant()); + + // Validate backward compatibility + + field1 = Schema.Field.of("field1", Schema.of(Schema.LogicalType.TIMESTAMP_MICROS)); + schema = Schema.recordOf( + "dbRecord", + field1 + ); + builder = StructuredRecord.builder(schema); + dbRecord.handleField(resultSet, builder, field1, 0, Types.TIMESTAMP, 0, 0); + record = builder.build(); + Assert.assertNotNull(record); + Assert.assertNotNull(record.getTimestamp("field1")); + Assert.assertEquals(record.getTimestamp("field1").toInstant(), offsetDateTime.toInstant()); + } + + @Test + public void validateTimestampTZType() throws SQLException { + OffsetDateTime offsetDateTime = OffsetDateTime.of(2023, 1, 1, 1, 0, 0, 0, ZoneOffset.UTC); + ResultSetMetaData metaData = Mockito.mock(ResultSetMetaData.class); + when(metaData.getColumnTypeName(eq(0))).thenReturn("timestamptz"); + + ResultSet resultSet = Mockito.mock(ResultSet.class); + when(resultSet.getMetaData()).thenReturn(metaData); + when(resultSet.getObject(eq(0), eq(OffsetDateTime.class))).thenReturn(offsetDateTime); + + Schema.Field field1 = Schema.Field.of("field1", Schema.of(Schema.LogicalType.TIMESTAMP_MICROS)); + Schema schema = Schema.recordOf( + "dbRecord", + field1 + ); + StructuredRecord.Builder builder = StructuredRecord.builder(schema); + + RedshiftDBRecord dbRecord = new RedshiftDBRecord(); + dbRecord.handleField(resultSet, builder, field1, 0, Types.TIMESTAMP, 0, 0); + StructuredRecord record = builder.build(); + Assert.assertNotNull(record); + Assert.assertNotNull(record.getTimestamp("field1", ZoneId.of("UTC"))); + Assert.assertEquals(record.getTimestamp("field1", ZoneId.of("UTC")).toInstant(), offsetDateTime.toInstant()); + } +} diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftFailedConnectionTest.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftFailedConnectionTest.java new file mode 100644 index 000000000..2d21c4478 --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftFailedConnectionTest.java @@ -0,0 +1,38 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.plugin.db.connector.DBSpecificFailedConnectionTest; +import org.junit.Test; + +import java.io.IOException; + +public class RedshiftFailedConnectionTest extends DBSpecificFailedConnectionTest { + private static final String JDBC_DRIVER_CLASS_NAME = "com.amazon.redshift.Driver"; + + @Test + public void test() throws ClassNotFoundException, IOException { + + RedshiftConnector connector = new RedshiftConnector( + new RedshiftConnectorConfig("username", "password", "jdbc", "", "localhost", "db", 5432)); + + super.test(JDBC_DRIVER_CLASS_NAME, connector, "Failed to create connection to database via connection string: " + + "jdbc:redshift://localhost:5432/db and arguments: " + + "{user=username}. Error: ConnectException: Connection refused " + + "(Connection refused)."); + } +} diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftPluginTestBase.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftPluginTestBase.java new file mode 100644 index 000000000..5df4fb300 --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftPluginTestBase.java @@ -0,0 +1,218 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import com.google.common.base.Charsets; +import com.google.common.base.Throwables; +import com.google.common.collect.Sets; +import io.cdap.cdap.api.artifact.ArtifactSummary; +import io.cdap.cdap.api.plugin.PluginClass; +import io.cdap.cdap.datapipeline.DataPipelineApp; +import io.cdap.cdap.proto.id.ArtifactId; +import io.cdap.cdap.proto.id.NamespaceId; +import io.cdap.plugin.db.ConnectionConfig; +import io.cdap.plugin.db.DBRecord; +import io.cdap.plugin.db.batch.DatabasePluginTestBase; +import io.cdap.plugin.db.sink.ETLDBOutputFormat; +import io.cdap.plugin.db.source.DataDrivenETLDBInputFormat; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.sql.Connection; +import java.sql.Date; +import java.sql.Driver; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Time; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +/** + * Base test class for Redshift plugins. + */ +public abstract class RedshiftPluginTestBase extends DatabasePluginTestBase { + private static final Logger LOGGER = LoggerFactory.getLogger(RedshiftPluginTestBase.class); + protected static final ArtifactId DATAPIPELINE_ARTIFACT_ID = NamespaceId.DEFAULT.artifact("data-pipeline", "3.2.0"); + protected static final ArtifactSummary DATAPIPELINE_ARTIFACT = new ArtifactSummary("data-pipeline", "3.2.0"); + protected static final long CURRENT_TS = System.currentTimeMillis(); + + protected static final String JDBC_DRIVER_NAME = "redshift"; + protected static final Map BASE_PROPS = new HashMap<>(); + + protected static String connectionUrl; + protected static int year; + protected static final int PRECISION = 10; + protected static final int SCALE = 6; + private static int startCount; + + @BeforeClass + public static void setupTest() throws Exception { + if (startCount++ > 0) { + return; + } + + getProperties(); + + Calendar calendar = Calendar.getInstance(); + calendar.setTime(new Date(CURRENT_TS)); + year = calendar.get(Calendar.YEAR); + + setupBatchArtifacts(DATAPIPELINE_ARTIFACT_ID, DataPipelineApp.class); + + addPluginArtifact(NamespaceId.DEFAULT.artifact(JDBC_DRIVER_NAME, "1.0.0"), + DATAPIPELINE_ARTIFACT_ID, + RedshiftSource.class, DBRecord.class, + ETLDBOutputFormat.class, DataDrivenETLDBInputFormat.class, DBRecord.class); + + // add mysql 3rd party plugin + PluginClass mysqlDriver = new PluginClass(ConnectionConfig.JDBC_PLUGIN_TYPE, JDBC_DRIVER_NAME, + "redshift driver class", Driver.class.getName(), + null, Collections.emptyMap()); + addPluginArtifact(NamespaceId.DEFAULT.artifact("redshift-jdbc-connector", "1.0.0"), + DATAPIPELINE_ARTIFACT_ID, + Sets.newHashSet(mysqlDriver), Driver.class); + + TimeZone.setDefault(TimeZone.getTimeZone("UTC")); + + connectionUrl = "jdbc:redshift://" + BASE_PROPS.get(ConnectionConfig.HOST) + ":" + + BASE_PROPS.get(ConnectionConfig.PORT) + "/" + BASE_PROPS.get(ConnectionConfig.DATABASE); + Connection conn = createConnection(); + createTestTables(conn); + prepareTestData(conn); + } + + private static void getProperties() { + BASE_PROPS.put(ConnectionConfig.HOST, getPropertyOrSkip("redshift.clusterEndpoint")); + BASE_PROPS.put(ConnectionConfig.PORT, getPropertyOrSkip("redshift.port")); + BASE_PROPS.put(ConnectionConfig.DATABASE, getPropertyOrSkip("redshift.database")); + BASE_PROPS.put(ConnectionConfig.USER, getPropertyOrSkip("redshift.username")); + BASE_PROPS.put(ConnectionConfig.PASSWORD, getPropertyOrSkip("redshift.password")); + BASE_PROPS.put(ConnectionConfig.JDBC_PLUGIN_NAME, JDBC_DRIVER_NAME); + } + + protected static void createTestTables(Connection conn) throws SQLException { + try (Statement stmt = conn.createStatement()) { + // create a table that the action will truncate at the end of the run + stmt.execute("CREATE TABLE \"dbActionTest\" (x int, day varchar(10))"); + // create a table that the action will truncate at the end of the run + stmt.execute("CREATE TABLE \"postActionTest\" (x int, day varchar(10))"); + + stmt.execute("CREATE TABLE my_table" + + "(" + + "\"ID\" INT NOT NULL," + + "\"NAME\" VARCHAR(40) NOT NULL," + + "\"SCORE\" REAL," + + "\"GRADUATED\" BOOLEAN," + + "\"NOT_IMPORTED\" VARCHAR(30)," + + "\"SMALLINT_COL\" SMALLINT," + + "\"BIG\" BIGINT," + + "\"NUMERIC_COL\" NUMERIC(" + PRECISION + "," + SCALE + ")," + + "\"DECIMAL_COL\" DECIMAL(" + PRECISION + "," + SCALE + ")," + + "\"DOUBLE_PREC_COL\" DOUBLE PRECISION," + + "\"DATE_COL\" DATE," + + "\"TIME_COL\" TIME," + + "\"TIMESTAMP_COL\" TIMESTAMP(3)," + + "\"TEXT_COL\" TEXT," + + "\"CHAR_COL\" CHAR(100)," + + "\"BYTEA_COL\" BYTEA" + + ")"); + stmt.execute("CREATE TABLE \"MY_DEST_TABLE\" AS " + + "SELECT * FROM my_table"); + stmt.execute("CREATE TABLE your_table AS " + + "SELECT * FROM my_table"); + } + } + + protected static void prepareTestData(Connection conn) throws SQLException { + try ( + Statement stmt = conn.createStatement(); + PreparedStatement pStmt1 = + conn.prepareStatement("INSERT INTO my_table " + + "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?," + + " ?, ?, ?, ?, ?, ?)"); + PreparedStatement pStmt2 = + conn.prepareStatement("INSERT INTO your_table " + + "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?," + + " ?, ?, ?, ?, ?, ?)")) { + + stmt.execute("insert into \"dbActionTest\" values (1, '1970-01-01')"); + stmt.execute("insert into \"postActionTest\" values (1, '1970-01-01')"); + + populateData(pStmt1, pStmt2); + } + } + + private static void populateData(PreparedStatement... stmts) throws SQLException { + // insert the same data into both tables: my_table and your_table + for (PreparedStatement pStmt : stmts) { + for (int i = 1; i <= 5; i++) { + String name = "user" + i; + pStmt.setInt(1, i); + pStmt.setString(2, name); + pStmt.setDouble(3, 123.45 + i); + pStmt.setBoolean(4, (i % 2 == 0)); + pStmt.setString(5, "random" + i); + pStmt.setShort(6, (short) i); + pStmt.setLong(7, (long) i); + pStmt.setBigDecimal(8, new BigDecimal("123.45").add(new BigDecimal(i))); + pStmt.setBigDecimal(9, new BigDecimal("123.45").add(new BigDecimal(i))); + pStmt.setDouble(10, 123.45 + i); + pStmt.setDate(11, new Date(CURRENT_TS)); + pStmt.setTime(12, new Time(CURRENT_TS)); + pStmt.setTimestamp(13, new Timestamp(CURRENT_TS)); + pStmt.setString(14, name); + pStmt.setString(15, "char" + i); + pStmt.setBytes(16, name.getBytes(Charsets.UTF_8)); + pStmt.executeUpdate(); + } + } + } + + public static Connection createConnection() { + try { + Class.forName(Driver.class.getCanonicalName()); + return DriverManager.getConnection(connectionUrl, BASE_PROPS.get(ConnectionConfig.USER), + BASE_PROPS.get(ConnectionConfig.PASSWORD)); + } catch (Exception e) { + throw Throwables.propagate(e); + } + } + + @AfterClass + public static void tearDownDB() { + try (Connection conn = createConnection(); + Statement stmt = conn.createStatement()) { + executeCleanup(Arrays.asList(() -> stmt.execute("DROP TABLE my_table"), + () -> stmt.execute("DROP TABLE your_table"), + () -> stmt.execute("DROP TABLE postActionTest"), + () -> stmt.execute("DROP TABLE dbActionTest"), + () -> stmt.execute("DROP TABLE MY_DEST_TABLE")), LOGGER); + } catch (Exception e) { + LOGGER.warn("Fail to tear down.", e); + } + } +} diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftPluginTestSuite.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftPluginTestSuite.java new file mode 100644 index 000000000..95ad0938b --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftPluginTestSuite.java @@ -0,0 +1,31 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import io.cdap.cdap.common.test.TestSuite; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + +/** + * This is a test suite that runs all the tests for Redshift plugins. + */ +@RunWith(TestSuite.class) +@Suite.SuiteClasses({ + RedshiftSourceTestRun.class, +}) +public class RedshiftPluginTestSuite extends RedshiftPluginTestBase { +} diff --git a/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftSourceTestRun.java b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftSourceTestRun.java new file mode 100644 index 000000000..1ac41bcd0 --- /dev/null +++ b/amazon-redshift-plugin/src/test/java/io/cdap/plugin/amazon/redshift/RedshiftSourceTestRun.java @@ -0,0 +1,332 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.amazon.redshift; + +import com.google.common.collect.ImmutableMap; +import io.cdap.cdap.api.common.Bytes; +import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.api.dataset.table.Table; +import io.cdap.cdap.etl.api.batch.BatchSource; +import io.cdap.cdap.etl.mock.batch.MockSink; +import io.cdap.cdap.etl.proto.v2.ETLBatchConfig; +import io.cdap.cdap.etl.proto.v2.ETLPlugin; +import io.cdap.cdap.etl.proto.v2.ETLStage; +import io.cdap.cdap.proto.artifact.AppRequest; +import io.cdap.cdap.proto.id.ApplicationId; +import io.cdap.cdap.proto.id.NamespaceId; +import io.cdap.cdap.test.ApplicationManager; +import io.cdap.cdap.test.DataSetManager; +import io.cdap.plugin.common.Constants; +import io.cdap.plugin.db.ConnectionConfig; +import io.cdap.plugin.db.DBConfig; +import io.cdap.plugin.db.source.AbstractDBSource; +import org.junit.Assert; +import org.junit.Test; + +import java.math.BigDecimal; +import java.math.MathContext; +import java.nio.ByteBuffer; +import java.sql.Date; +import java.sql.Time; +import java.text.SimpleDateFormat; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Test for Redshift source plugin. + */ +public class RedshiftSourceTestRun extends RedshiftPluginTestBase { + + @Test + @SuppressWarnings("ConstantConditions") + public void testDBMacroSupport() throws Exception { + String importQuery = "SELECT * FROM my_table WHERE \"DATE_COL\" <= '${logicalStartTime(yyyy-MM-dd,1d)}' " + + "AND $CONDITIONS"; + String boundingQuery = "SELECT MIN(ID),MAX(ID) from my_table"; + String splitBy = "ID"; + + ImmutableMap sourceProps = ImmutableMap.builder() + .putAll(BASE_PROPS) + .put(AbstractDBSource.DBSourceConfig.IMPORT_QUERY, importQuery) + .put(AbstractDBSource.DBSourceConfig.BOUNDING_QUERY, boundingQuery) + .put(AbstractDBSource.DBSourceConfig.SPLIT_BY, splitBy) + .put(Constants.Reference.REFERENCE_NAME, "DBTestSource").build(); + + ETLPlugin sourceConfig = new ETLPlugin( + RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, + sourceProps + ); + + ETLPlugin sinkConfig = MockSink.getPlugin("macroOutputTable"); + + ApplicationManager appManager = deployETL(sourceConfig, sinkConfig, + DATAPIPELINE_ARTIFACT, "testDBMacro"); + runETLOnce(appManager, ImmutableMap.of("logical.start.time", String.valueOf(CURRENT_TS))); + + DataSetManager outputManager = getDataset("macroOutputTable"); + Assert.assertTrue(MockSink.readOutput(outputManager).isEmpty()); + } + + @Test + @SuppressWarnings("ConstantConditions") + public void testDBSource() throws Exception { + String importQuery = "SELECT \"ID\", \"NAME\", \"SCORE\", \"GRADUATED\", \"SMALLINT_COL\", \"BIG\", " + + "\"NUMERIC_COL\", \"CHAR_COL\", \"DECIMAL_COL\", \"BYTEA_COL\", \"DATE_COL\", \"TIME_COL\", \"TIMESTAMP_COL\", " + + "\"TEXT_COL\", \"DOUBLE_PREC_COL\" FROM my_table " + + "WHERE \"ID\" < 3 AND $CONDITIONS"; + String boundingQuery = "SELECT MIN(\"ID\"),MAX(\"ID\") from my_table"; + String splitBy = "ID"; + ETLPlugin sourceConfig = new ETLPlugin( + RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, + ImmutableMap.builder() + .putAll(BASE_PROPS) + .put(AbstractDBSource.DBSourceConfig.IMPORT_QUERY, importQuery) + .put(AbstractDBSource.DBSourceConfig.BOUNDING_QUERY, boundingQuery) + .put(AbstractDBSource.DBSourceConfig.SPLIT_BY, splitBy) + .put(Constants.Reference.REFERENCE_NAME, "DBSourceTest") + .build(), + null + ); + + String outputDatasetName = "output-dbsourcetest"; + ETLPlugin sinkConfig = MockSink.getPlugin(outputDatasetName); + + ApplicationManager appManager = deployETL(sourceConfig, sinkConfig, + DATAPIPELINE_ARTIFACT, "testDBSource"); + runETLOnce(appManager); + + DataSetManager
outputManager = getDataset(outputDatasetName); + List outputRecords = MockSink.readOutput(outputManager); + + Assert.assertEquals(2, outputRecords.size()); + String userid = outputRecords.get(0).get("NAME"); + StructuredRecord row1 = "user1".equals(userid) ? outputRecords.get(0) : outputRecords.get(1); + StructuredRecord row2 = "user1".equals(userid) ? outputRecords.get(1) : outputRecords.get(0); + + // Verify data + Assert.assertEquals("user1", row1.get("NAME")); + Assert.assertEquals("user2", row2.get("NAME")); + Assert.assertEquals("user1", row1.get("TEXT_COL")); + Assert.assertEquals("user2", row2.get("TEXT_COL")); + Assert.assertEquals("char1", ((String) row1.get("CHAR_COL")).trim()); + Assert.assertEquals("char2", ((String) row2.get("CHAR_COL")).trim()); + Assert.assertEquals(124.45f, ((Float) row1.get("SCORE")).doubleValue(), 0.000001); + Assert.assertEquals(125.45f, ((Float) row2.get("SCORE")).doubleValue(), 0.000001); + Assert.assertEquals(false, row1.get("GRADUATED")); + Assert.assertEquals(true, row2.get("GRADUATED")); + Assert.assertNull(row1.get("NOT_IMPORTED")); + Assert.assertNull(row2.get("NOT_IMPORTED")); + + Assert.assertEquals(1, (int) row1.get("SMALLINT_COL")); + Assert.assertEquals(2, (int) row2.get("SMALLINT_COL")); + Assert.assertEquals(1, (long) row1.get("BIG")); + Assert.assertEquals(2, (long) row2.get("BIG")); + + Assert.assertEquals(new BigDecimal("124.45", new MathContext(PRECISION)).setScale(SCALE), + row1.getDecimal("NUMERIC_COL")); + Assert.assertEquals(new BigDecimal("125.45", new MathContext(PRECISION)).setScale(SCALE), + row2.getDecimal("NUMERIC_COL")); + Assert.assertEquals(new BigDecimal("124.45", new MathContext(PRECISION)).setScale(SCALE), + row1.getDecimal("DECIMAL_COL")); + + Assert.assertEquals(124.45, (double) row1.get("DOUBLE_PREC_COL"), 0.000001); + Assert.assertEquals(125.45, (double) row2.get("DOUBLE_PREC_COL"), 0.000001); + // Verify time columns + java.util.Date date = new java.util.Date(CURRENT_TS); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + LocalDate expectedDate = Date.valueOf(sdf.format(date)).toLocalDate(); + sdf = new SimpleDateFormat("H:mm:ss"); + LocalTime expectedTime = Time.valueOf(sdf.format(date)).toLocalTime(); + ZonedDateTime expectedTs = date.toInstant().atZone(ZoneId.ofOffset("UTC", ZoneOffset.UTC)); + Assert.assertEquals(expectedDate, row1.getDate("DATE_COL")); + Assert.assertEquals(expectedTime, row1.getTime("TIME_COL")); + Assert.assertEquals(expectedTs, row1.getTimestamp("TIMESTAMP_COL", ZoneId.ofOffset("UTC", ZoneOffset.UTC))); + + // verify binary columns + Assert.assertEquals("user1", Bytes.toString(((ByteBuffer) row1.get("BYTEA_COL")).array(), 0, 5)); + Assert.assertEquals("user2", Bytes.toString(((ByteBuffer) row2.get("BYTEA_COL")).array(), 0, 5)); + } + + @Test + public void testDbSourceMultipleTables() throws Exception { + String importQuery = "SELECT \"my_table\".\"ID\", \"your_table\".\"NAME\" FROM \"my_table\", \"your_table\"" + + "WHERE \"my_table\".\"ID\" < 3 and \"my_table\".\"ID\" = \"your_table\".\"ID\" and $CONDITIONS"; + String boundingQuery = "SELECT MIN(MIN(\"my_table\".\"ID\"), MIN(\"your_table\".\"ID\")), " + + "MAX(MAX(\"my_table\".\"ID\"), MAX(\"your_table\".\"ID\"))"; + String splitBy = "\"my_table\".\"ID\""; + ETLPlugin sourceConfig = new ETLPlugin( + RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, + ImmutableMap.builder() + .putAll(BASE_PROPS) + .put(AbstractDBSource.DBSourceConfig.IMPORT_QUERY, importQuery) + .put(AbstractDBSource.DBSourceConfig.BOUNDING_QUERY, boundingQuery) + .put(AbstractDBSource.DBSourceConfig.SPLIT_BY, splitBy) + .put(Constants.Reference.REFERENCE_NAME, "DBMultipleTest") + .build(), + null + ); + + String outputDatasetName = "output-multitabletest"; + ETLPlugin sinkConfig = MockSink.getPlugin(outputDatasetName); + + ApplicationManager appManager = deployETL(sourceConfig, sinkConfig, + DATAPIPELINE_ARTIFACT, "testDBSourceWithMultipleTables"); + runETLOnce(appManager); + + // records should be written + DataSetManager
outputManager = getDataset(outputDatasetName); + List outputRecords = MockSink.readOutput(outputManager); + Assert.assertEquals(2, outputRecords.size()); + String userid = outputRecords.get(0).get("NAME"); + StructuredRecord row1 = "user1".equals(userid) ? outputRecords.get(0) : outputRecords.get(1); + StructuredRecord row2 = "user1".equals(userid) ? outputRecords.get(1) : outputRecords.get(0); + // Verify data + Assert.assertEquals("user1", row1.get("NAME")); + Assert.assertEquals("user2", row2.get("NAME")); + Assert.assertEquals(1, row1.get("ID").intValue()); + Assert.assertEquals(2, row2.get("ID").intValue()); + } + + @Test + public void testUserNamePasswordCombinations() throws Exception { + String importQuery = "SELECT * FROM \"my_table\" WHERE $CONDITIONS"; + String boundingQuery = "SELECT MIN(\"ID\"),MAX(\"ID\") from \"my_table\""; + String splitBy = "\"ID\""; + + ETLPlugin sinkConfig = MockSink.getPlugin("outputTable"); + + Map baseSourceProps = ImmutableMap.builder() + .put(ConnectionConfig.HOST, BASE_PROPS.get(ConnectionConfig.HOST)) + .put(ConnectionConfig.PORT, BASE_PROPS.get(ConnectionConfig.PORT)) + .put(ConnectionConfig.DATABASE, BASE_PROPS.get(ConnectionConfig.DATABASE)) + .put(ConnectionConfig.JDBC_PLUGIN_NAME, JDBC_DRIVER_NAME) + .put(AbstractDBSource.DBSourceConfig.IMPORT_QUERY, importQuery) + .put(AbstractDBSource.DBSourceConfig.BOUNDING_QUERY, boundingQuery) + .put(AbstractDBSource.DBSourceConfig.SPLIT_BY, splitBy) + .put(Constants.Reference.REFERENCE_NAME, "UserPassDBTest") + .build(); + + ApplicationId appId = NamespaceId.DEFAULT.app("dbTest"); + + // null user name, null password. Should succeed. + // as source + ETLPlugin dbConfig = new ETLPlugin(RedshiftConstants.PLUGIN_NAME, BatchSource.PLUGIN_TYPE, + baseSourceProps, null); + ETLStage table = new ETLStage("uniqueTableSink", sinkConfig); + ETLStage database = new ETLStage("databaseSource", dbConfig); + ETLBatchConfig etlConfig = ETLBatchConfig.builder() + .addStage(database) + .addStage(table) + .addConnection(database.getName(), table.getName()) + .build(); + AppRequest appRequest = new AppRequest<>(DATAPIPELINE_ARTIFACT, etlConfig); + deployApplication(appId, appRequest); + + // null user name, non-null password. Should fail. + // as source + Map noUser = new HashMap<>(baseSourceProps); + noUser.put(DBConfig.PASSWORD, "password"); + database = new ETLStage("databaseSource", new ETLPlugin(RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, noUser, null)); + etlConfig = ETLBatchConfig.builder() + .addStage(database) + .addStage(table) + .addConnection(database.getName(), table.getName()) + .build(); + assertDeploymentFailure(appId, etlConfig, DATAPIPELINE_ARTIFACT, + "Deploying DB Source with null username but non-null password should have failed."); + + // non-null username, non-null, but empty password. Should succeed. + // as source + Map emptyPassword = new HashMap<>(baseSourceProps); + emptyPassword.put(DBConfig.USER, "root"); + emptyPassword.put(DBConfig.PASSWORD, ""); + database = new ETLStage("databaseSource", new ETLPlugin(RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, emptyPassword, null)); + etlConfig = ETLBatchConfig.builder() + .addStage(database) + .addStage(table) + .addConnection(database.getName(), table.getName()) + .build(); + appRequest = new AppRequest<>(DATAPIPELINE_ARTIFACT, etlConfig); + deployApplication(appId, appRequest); + } + + @Test + public void testNonExistentDBTable() throws Exception { + // source + String importQuery = "SELECT \"ID\", \"NAME\" FROM \"dummy\" WHERE ID < 3 AND $CONDITIONS"; + String boundingQuery = "SELECT MIN(\"ID\"),MAX(\"ID\") FROM \"dummy\""; + String splitBy = "\"ID\""; + ETLPlugin sinkConfig = MockSink.getPlugin("table"); + ETLPlugin sourceBadNameConfig = new ETLPlugin( + RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, + ImmutableMap.builder() + .putAll(BASE_PROPS) + .put(AbstractDBSource.DBSourceConfig.IMPORT_QUERY, importQuery) + .put(AbstractDBSource.DBSourceConfig.BOUNDING_QUERY, boundingQuery) + .put(AbstractDBSource.DBSourceConfig.SPLIT_BY, splitBy) + .put(Constants.Reference.REFERENCE_NAME, "DBNonExistentTest") + .build(), + null); + ETLStage sink = new ETLStage("sink", sinkConfig); + ETLStage sourceBadName = new ETLStage("sourceBadName", sourceBadNameConfig); + + ETLBatchConfig etlConfig = ETLBatchConfig.builder() + .addStage(sourceBadName) + .addStage(sink) + .addConnection(sourceBadName.getName(), sink.getName()) + .build(); + ApplicationId appId = NamespaceId.DEFAULT.app("dbSourceNonExistingTest"); + assertDeployAppFailure(appId, etlConfig, DATAPIPELINE_ARTIFACT); + + // Bad connection + ETLPlugin sourceBadConnConfig = new ETLPlugin( + RedshiftConstants.PLUGIN_NAME, + BatchSource.PLUGIN_TYPE, + ImmutableMap.builder() + .put(ConnectionConfig.HOST, BASE_PROPS.get(ConnectionConfig.HOST)) + .put(ConnectionConfig.PORT, BASE_PROPS.get(ConnectionConfig.PORT)) + .put(ConnectionConfig.DATABASE, "dumDB") + .put(ConnectionConfig.USER, BASE_PROPS.get(ConnectionConfig.USER)) + .put(ConnectionConfig.PASSWORD, BASE_PROPS.get(ConnectionConfig.PASSWORD)) + .put(ConnectionConfig.JDBC_PLUGIN_NAME, JDBC_DRIVER_NAME) + .put(AbstractDBSource.DBSourceConfig.IMPORT_QUERY, importQuery) + .put(AbstractDBSource.DBSourceConfig.BOUNDING_QUERY, boundingQuery) + .put(AbstractDBSource.DBSourceConfig.SPLIT_BY, splitBy) + .put(Constants.Reference.REFERENCE_NAME, "RedshiftTest") + .build(), + null); + ETLStage sourceBadConn = new ETLStage("sourceBadConn", sourceBadConnConfig); + etlConfig = ETLBatchConfig.builder() + .addStage(sourceBadConn) + .addStage(sink) + .addConnection(sourceBadConn.getName(), sink.getName()) + .build(); + assertDeployAppFailure(appId, etlConfig, DATAPIPELINE_ARTIFACT); + } +} diff --git a/amazon-redshift-plugin/widgets/Redshift-batchsource.json b/amazon-redshift-plugin/widgets/Redshift-batchsource.json new file mode 100644 index 000000000..91e860ee9 --- /dev/null +++ b/amazon-redshift-plugin/widgets/Redshift-batchsource.json @@ -0,0 +1,240 @@ +{ + "metadata": { + "spec-version": "1.5" + }, + "display-name": "Redshift", + "configuration-groups": [ + { + "label": "Connection", + "properties": [ + { + "widget-type": "toggle", + "label": "Use connection", + "name": "useConnection", + "widget-attributes": { + "on": { + "value": "true", + "label": "YES" + }, + "off": { + "value": "false", + "label": "NO" + }, + "default": "false" + } + }, + { + "widget-type": "connection-select", + "label": "Connection", + "name": "connection", + "widget-attributes": { + "connectionType": "Redshift" + } + }, + { + "widget-type": "plugin-list", + "label": "JDBC Driver name", + "name": "jdbcPluginName", + "widget-attributes": { + "plugin-type": "jdbc" + } + }, + { + "widget-type": "textbox", + "label": "Host", + "name": "host", + "widget-attributes": { + "placeholder": "Redshift endpoint host name." + } + }, + { + "widget-type": "number", + "label": "Port", + "name": "port", + "widget-attributes": { + "default": "5439" + } + }, + { + "widget-type": "textbox", + "label": "Username", + "name": "user" + }, + { + "widget-type": "password", + "label": "Password", + "name": "password" + }, + { + "widget-type": "keyvalue", + "label": "Connection Arguments", + "name": "connectionArguments", + "widget-attributes": { + "showDelimiter": "false", + "key-placeholder": "Key", + "value-placeholder": "Value", + "kv-delimiter" : "=", + "delimiter" : ";" + } + } + ] + }, + { + "label": "Basic", + "properties": [ + { + "widget-type": "textbox", + "label": "Reference Name", + "name": "referenceName", + "widget-attributes": { + "placeholder": "Name used to identify this source for lineage. Typically, the name of the table/view." + } + }, + { + "widget-type": "textbox", + "label": "Database", + "name": "database" + }, + { + "widget-type": "connection-browser", + "widget-category": "plugin", + "widget-attributes": { + "connectionType": "Redshift", + "label": "Browse Database" + } + } + ] + }, + { + "label": "SQL Query", + "properties": [ + { + "widget-type": "textarea", + "label": "Import Query", + "name": "importQuery", + "widget-attributes": { + "rows": "4" + } + }, + { + "widget-type": "get-schema", + "widget-category": "plugin" + } + ] + }, + { + "label": "Advanced", + "properties": [ + { + "widget-type": "textarea", + "label": "Bounding Query", + "name": "boundingQuery", + "widget-attributes": { + "rows": "4" + } + }, + { + "widget-type": "textbox", + "label": "Split Column", + "name": "splitBy" + }, + { + "widget-type": "textbox", + "label": "Number of Splits", + "name": "numSplits", + "widget-attributes": { + "default": "1" + } + }, + { + "widget-type": "number", + "label": "Fetch Size", + "name": "fetchSize", + "widget-attributes": { + "default": "1000", + "minimum": "0" + } + } + ] + } + ], + "outputs": [ + { + "name": "schema", + "widget-type": "schema", + "widget-attributes": { + "schema-types": [ + "boolean", + "int", + "long", + "float", + "double", + "bytes", + "string" + ], + "schema-default-type": "string" + } + } + ], + "filters": [ + { + "name": "showConnectionProperties ", + "condition": { + "expression": "useConnection == false" + }, + "show": [ + { + "type": "property", + "name": "jdbcPluginName" + }, + { + "type": "property", + "name": "instanceType" + }, + { + "type": "property", + "name": "host" + }, + { + "type": "property", + "name": "port" + }, + { + "type": "property", + "name": "user" + }, + { + "type": "property", + "name": "password" + }, + { + "type": "property", + "name": "database" + }, + { + "type": "property", + "name": "connectionArguments" + } + ] + }, + { + "name": "showConnectionId", + "condition": { + "expression": "useConnection == true" + }, + "show": [ + { + "type": "property", + "name": "connection" + } + ] + }, + ], + "jump-config": { + "datasets": [ + { + "ref-property-name": "referenceName" + } + ] + } +} diff --git a/amazon-redshift-plugin/widgets/Redshift-connector.json b/amazon-redshift-plugin/widgets/Redshift-connector.json new file mode 100644 index 000000000..3a2af8e01 --- /dev/null +++ b/amazon-redshift-plugin/widgets/Redshift-connector.json @@ -0,0 +1,75 @@ +{ + "metadata": { + "spec-version": "1.0" + }, + "display-name": "Redshift", + "configuration-groups": [ + { + "label": "Basic", + "properties": [ + { + "widget-type": "plugin-list", + "label": "JDBC Driver name", + "name": "jdbcPluginName", + "widget-attributes": { + "plugin-type": "jdbc" + } + }, + { + "widget-type": "textbox", + "label": "Host", + "name": "host", + "widget-attributes": { + "default": "localhost" + } + }, + { + "widget-type": "number", + "label": "Port", + "name": "port", + "widget-attributes": { + "default": "5439" + } + }, + { + "widget-type": "textbox", + "label": "Database", + "name": "database" + } + ] + }, + { + "label": "Credentials", + "properties": [ + { + "widget-type": "textbox", + "label": "Username", + "name": "user" + }, + { + "widget-type": "password", + "label": "Password", + "name": "password" + } + ] + }, + { + "label": "Advanced", + "properties": [ + { + "widget-type": "keyvalue", + "label": "Connection Arguments", + "name": "connectionArguments", + "widget-attributes": { + "showDelimiter": "false", + "key-placeholder": "Key", + "value-placeholder": "Value", + "kv-delimiter": "=", + "delimiter": ";" + } + } + ] + } + ], + "outputs": [] +} diff --git a/aurora-mysql-plugin/pom.xml b/aurora-mysql-plugin/pom.xml index 51ff6fb06..d552ce1b4 100644 --- a/aurora-mysql-plugin/pom.xml +++ b/aurora-mysql-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Aurora DB MySQL plugin diff --git a/aurora-postgresql-plugin/pom.xml b/aurora-postgresql-plugin/pom.xml index 28de0db21..316f05f7c 100644 --- a/aurora-postgresql-plugin/pom.xml +++ b/aurora-postgresql-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Aurora DB PostgreSQL plugin @@ -103,13 +103,13 @@ <_exportcontents> - io.cdap.plugin.auroradb.postgres.*; - io.cdap.plugin.postgres.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.auroradb.postgres.*; + io.cdap.plugin.postgres.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/cloudsql-mysql-plugin/pom.xml b/cloudsql-mysql-plugin/pom.xml index d05e592f5..fd9b742c2 100644 --- a/cloudsql-mysql-plugin/pom.xml +++ b/cloudsql-mysql-plugin/pom.xml @@ -20,13 +20,13 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 CloudSQL MySQL plugin cloudsql-mysql-plugin 4.0.0 - + io.cdap.cdap @@ -45,7 +45,7 @@ io.cdap.plugin mysql-plugin - 1.11.0-SNAPSHOT + ${project.version} @@ -99,7 +99,7 @@ test - + @@ -128,13 +128,13 @@ <_exportcontents> - io.cdap.plugin.cloudsql.mysql.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - io.cdap.plugin.mysql.*; - org.apache.commons.lang.*; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.cloudsql.mysql.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + io.cdap.plugin.mysql.*; + org.apache.commons.lang.*; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnector.java b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnector.java index a5ee68787..b4b87c81b 100644 --- a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnector.java +++ b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnector.java @@ -16,6 +16,7 @@ package io.cdap.plugin.cloudsql.mysql; +import com.google.common.collect.Maps; import io.cdap.cdap.api.annotation.Category; import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; @@ -75,7 +76,7 @@ public StructuredRecord transform(LongWritable longWritable, MysqlDBRecord mysql @Override protected SchemaReader getSchemaReader(String sessionID) { - return new MysqlSchemaReader(sessionID); + return new MysqlSchemaReader(sessionID, Maps.fromProperties(config.getConnectionArgumentsProperties())); } @Override diff --git a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnectorConfig.java b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnectorConfig.java index 1e89d5a95..e763f6235 100644 --- a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnectorConfig.java +++ b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLConnectorConfig.java @@ -105,14 +105,14 @@ public String getConnectionString() { @Override public Properties getConnectionArgumentsProperties() { Properties properties = super.getConnectionArgumentsProperties(); - properties.put(JDBC_PROPERTY_CONNECT_TIMEOUT_MILLIS, "20000"); - properties.put(JDBC_PROPERTY_SOCKET_TIMEOUT_MILLIS, "20000"); + properties.putIfAbsent(JDBC_PROPERTY_CONNECT_TIMEOUT_MILLIS, "20000"); + properties.putIfAbsent(JDBC_PROPERTY_SOCKET_TIMEOUT_MILLIS, "20000"); return properties; } @Override public boolean canConnect() { return super.canConnect() && !containsMacro(CloudSQLUtil.CONNECTION_NAME) && - !containsMacro(ConnectionConfig.PORT) && !containsMacro(ConnectionConfig.DATABASE); + !containsMacro(ConnectionConfig.PORT) && !containsMacro(ConnectionConfig.DATABASE); } } diff --git a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSink.java b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSink.java index 271012f7e..6149c114b 100644 --- a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSink.java +++ b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSink.java @@ -16,6 +16,7 @@ package io.cdap.plugin.cloudsql.mysql; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import io.cdap.cdap.api.annotation.Description; @@ -25,6 +26,7 @@ import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.api.data.schema.Schema; import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.cdap.etl.api.PipelineConfigurer; import io.cdap.cdap.etl.api.batch.BatchSink; @@ -40,7 +42,11 @@ import io.cdap.plugin.util.CloudSQLUtil; import io.cdap.plugin.util.DBUtils; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.StringJoiner; import javax.annotation.Nullable; /** Sink support for a CloudSQL MySQL database. */ @@ -52,6 +58,7 @@ public class CloudSQLMySQLSink extends AbstractDBSink { private final CloudSQLMySQLSinkConfig cloudsqlMysqlSinkConfig; + private static final Character ESCAPE_CHAR = '`'; public CloudSQLMySQLSink(CloudSQLMySQLSinkConfig cloudsqlMysqlSinkConfig) { super(cloudsqlMysqlSinkConfig); @@ -78,6 +85,24 @@ protected DBRecord getDBRecord(StructuredRecord output) { return new MysqlDBRecord(output, columnTypes); } + @Override + protected void setColumnsInfo(List fields) { + List columnsList = new ArrayList<>(); + StringJoiner columnsJoiner = new StringJoiner(","); + for (Schema.Field field : fields) { + columnsList.add(field.getName()); + columnsJoiner.add(ESCAPE_CHAR + field.getName() + ESCAPE_CHAR); + } + + super.columns = Collections.unmodifiableList(columnsList); + super.dbColumns = columnsJoiner.toString(); + } + + @VisibleForTesting + String getDbColumns() { + return dbColumns; + } + @Override protected LineageRecorder getLineageRecorder(BatchSinkContext context) { String host; diff --git a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSource.java b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSource.java index b8b6fbf27..b0bea9e7a 100644 --- a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSource.java +++ b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSource.java @@ -31,9 +31,11 @@ import io.cdap.plugin.common.Asset; import io.cdap.plugin.common.ConfigUtil; import io.cdap.plugin.common.LineageRecorder; +import io.cdap.plugin.db.SchemaReader; import io.cdap.plugin.db.config.AbstractDBSpecificSourceConfig; import io.cdap.plugin.db.source.AbstractDBSource; import io.cdap.plugin.mysql.MysqlDBRecord; +import io.cdap.plugin.mysql.MysqlSchemaReader; import io.cdap.plugin.util.CloudSQLUtil; import io.cdap.plugin.util.DBUtils; import org.apache.hadoop.mapreduce.lib.db.DBWritable; @@ -120,6 +122,11 @@ protected LineageRecorder getLineageRecorder(BatchSourceContext context) { return new LineageRecorder(context, assetBuilder.build()); } + @Override + protected SchemaReader getSchemaReader() { + return new MysqlSchemaReader(null, cloudsqlMysqlSourceConfig.getConnectionArguments()); + } + /** CloudSQL MySQL source config. */ public static class CloudSQLMySQLSourceConfig extends AbstractDBSpecificSourceConfig { diff --git a/cloudsql-mysql-plugin/src/test/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSinkTest.java b/cloudsql-mysql-plugin/src/test/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSinkTest.java new file mode 100644 index 000000000..65a14502e --- /dev/null +++ b/cloudsql-mysql-plugin/src/test/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSinkTest.java @@ -0,0 +1,35 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.cloudsql.mysql; + +import io.cdap.cdap.api.data.schema.Schema; +import org.junit.Assert; +import org.junit.Test; + +public class CloudSQLMySQLSinkTest { + @Test + public void testSetColumnsInfo() { + Schema outputSchema = Schema.recordOf("output", + Schema.Field.of("id", Schema.of(Schema.Type.INT)), + Schema.Field.of("name", Schema.of(Schema.Type.STRING)), + Schema.Field.of("insert", Schema.of(Schema.Type.STRING))); + CloudSQLMySQLSink cloudSQLMySQLSink = new CloudSQLMySQLSink(new CloudSQLMySQLSink.CloudSQLMySQLSinkConfig()); + Assert.assertNotNull(outputSchema.getFields()); + cloudSQLMySQLSink.setColumnsInfo(outputSchema.getFields()); + Assert.assertEquals("`id`,`name`,`insert`", cloudSQLMySQLSink.getDbColumns()); + } +} diff --git a/cloudsql-postgresql-plugin/pom.xml b/cloudsql-postgresql-plugin/pom.xml index 2f974e854..b1217665e 100644 --- a/cloudsql-postgresql-plugin/pom.xml +++ b/cloudsql-postgresql-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 CloudSQL PostgreSQL plugin @@ -133,13 +133,13 @@ <_exportcontents> - io.cdap.plugin.cloudsql.postgres.*; - io.cdap.plugin.postgres.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang.*; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.cloudsql.postgres.*; + io.cdap.plugin.postgres.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang.*; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/database-commons/pom.xml b/database-commons/pom.xml index 0ecbfb445..0ceec28ca 100644 --- a/database-commons/pom.xml +++ b/database-commons/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Database Commons diff --git a/database-commons/src/main/java/io/cdap/plugin/db/ConnectionConfig.java b/database-commons/src/main/java/io/cdap/plugin/db/ConnectionConfig.java index 588ed78b8..c5320e25e 100644 --- a/database-commons/src/main/java/io/cdap/plugin/db/ConnectionConfig.java +++ b/database-commons/src/main/java/io/cdap/plugin/db/ConnectionConfig.java @@ -45,6 +45,7 @@ public abstract class ConnectionConfig extends PluginConfig implements DatabaseC public static final String CONNECTION_ARGUMENTS = "connectionArguments"; public static final String JDBC_PLUGIN_NAME = "jdbcPluginName"; public static final String JDBC_PLUGIN_TYPE = "jdbc"; + public static final String TRANSACTION_ISOLATION_LEVEL = "transactionIsolationLevel"; @Name(JDBC_PLUGIN_NAME) @Description("Name of the JDBC driver to use. This is the value of the 'jdbcPluginName' key defined in the JSON " + diff --git a/database-commons/src/main/java/io/cdap/plugin/db/connector/AbstractDBSpecificConnectorConfig.java b/database-commons/src/main/java/io/cdap/plugin/db/connector/AbstractDBSpecificConnectorConfig.java index 5c6b08031..8de0e4d70 100644 --- a/database-commons/src/main/java/io/cdap/plugin/db/connector/AbstractDBSpecificConnectorConfig.java +++ b/database-commons/src/main/java/io/cdap/plugin/db/connector/AbstractDBSpecificConnectorConfig.java @@ -20,8 +20,9 @@ import io.cdap.cdap.api.annotation.Macro; import io.cdap.cdap.api.annotation.Name; import io.cdap.plugin.db.ConnectionConfig; +import io.cdap.plugin.db.TransactionIsolationLevel; -import java.util.Collections; +import java.util.HashMap; import java.util.Map; import javax.annotation.Nullable; @@ -42,6 +43,12 @@ public abstract class AbstractDBSpecificConnectorConfig extends AbstractDBConnec @Nullable protected Integer port; + @Name(ConnectionConfig.TRANSACTION_ISOLATION_LEVEL) + @Description("The transaction isolation level for the database session.") + @Macro + @Nullable + protected String transactionIsolationLevel; + public String getHost() { return host; } @@ -55,4 +62,21 @@ public int getPort() { public boolean canConnect() { return super.canConnect() && !containsMacro(ConnectionConfig.HOST) && !containsMacro(ConnectionConfig.PORT); } + + @Override + public Map getAdditionalArguments() { + Map additonalArguments = new HashMap<>(); + if (getTransactionIsolationLevel() != null) { + additonalArguments.put(TransactionIsolationLevel.CONF_KEY, getTransactionIsolationLevel()); + } + return additonalArguments; + } + + public String getTransactionIsolationLevel() { + if (transactionIsolationLevel == null) { + return null; + } + return TransactionIsolationLevel.Level.valueOf(transactionIsolationLevel).name(); + } } + diff --git a/database-commons/src/main/java/io/cdap/plugin/db/source/AbstractDBSource.java b/database-commons/src/main/java/io/cdap/plugin/db/source/AbstractDBSource.java index 987b5cc17..8eeb4a155 100644 --- a/database-commons/src/main/java/io/cdap/plugin/db/source/AbstractDBSource.java +++ b/database-commons/src/main/java/io/cdap/plugin/db/source/AbstractDBSource.java @@ -484,7 +484,7 @@ public void validateSchema(Schema actualSchema, FailureCollector collector) { } @VisibleForTesting - static void validateSchema(Schema actualSchema, Schema configSchema, FailureCollector collector) { + void validateSchema(Schema actualSchema, Schema configSchema, FailureCollector collector) { if (configSchema == null) { collector.addFailure("Schema should not be null or empty.", null) .withConfigProperty(SCHEMA); @@ -505,14 +505,20 @@ static void validateSchema(Schema actualSchema, Schema configSchema, FailureColl Schema expectedFieldSchema = field.getSchema().isNullable() ? field.getSchema().getNonNullable() : field.getSchema(); - if (actualFieldSchema.getType() != expectedFieldSchema.getType() || - actualFieldSchema.getLogicalType() != expectedFieldSchema.getLogicalType()) { - collector.addFailure( - String.format("Schema field '%s' has type '%s but found '%s'.", - field.getName(), expectedFieldSchema.getDisplayName(), - actualFieldSchema.getDisplayName()), null) - .withOutputSchemaField(field.getName()); - } + validateField(collector, field, actualFieldSchema, expectedFieldSchema); + } + } + + protected void validateField(FailureCollector collector, Schema.Field field, Schema actualFieldSchema, + Schema expectedFieldSchema) { + if (actualFieldSchema.getType() != expectedFieldSchema.getType() || + actualFieldSchema.getLogicalType() != expectedFieldSchema.getLogicalType()) { + collector.addFailure( + String.format("Schema field '%s' is expected to have type '%s but found '%s'.", field.getName(), + expectedFieldSchema.getDisplayName(), actualFieldSchema.getDisplayName()), + String.format("Change the data type of field %s to %s.", field.getName(), + actualFieldSchema.getDisplayName())) + .withOutputSchemaField(field.getName()); } } diff --git a/database-commons/src/test/java/io/cdap/plugin/db/source/AbstractDBSourceTest.java b/database-commons/src/test/java/io/cdap/plugin/db/source/AbstractDBSourceTest.java index 3dc7a2d1c..a8be38b46 100644 --- a/database-commons/src/test/java/io/cdap/plugin/db/source/AbstractDBSourceTest.java +++ b/database-commons/src/test/java/io/cdap/plugin/db/source/AbstractDBSourceTest.java @@ -43,11 +43,17 @@ public class AbstractDBSourceTest { Schema.Field.of("double_column", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))), Schema.Field.of("boolean_column", Schema.nullableOf(Schema.of(Schema.Type.BOOLEAN))) ); + private static final AbstractDBSource.DBSourceConfig TEST_CONFIG = new AbstractDBSource.DBSourceConfig() { + @Override + public String getConnectionString() { + return ""; + } + }; @Test public void testValidateSourceSchemaCorrectSchema() { MockFailureCollector collector = new MockFailureCollector(MOCK_STAGE); - AbstractDBSource.DBSourceConfig.validateSchema(SCHEMA, SCHEMA, collector); + TEST_CONFIG.validateSchema(SCHEMA, SCHEMA, collector); Assert.assertEquals(0, collector.getValidationFailures().size()); } @@ -65,7 +71,7 @@ public void testValidateSourceSchemaMismatchFields() { ); MockFailureCollector collector = new MockFailureCollector(MOCK_STAGE); - AbstractDBSource.DBSourceConfig.validateSchema(actualSchema, SCHEMA, collector); + TEST_CONFIG.validateSchema(actualSchema, SCHEMA, collector); assertPropertyValidationFailed(collector, "boolean_column"); } @@ -84,7 +90,7 @@ public void testValidateSourceSchemaInvalidFieldType() { ); MockFailureCollector collector = new MockFailureCollector(MOCK_STAGE); - AbstractDBSource.DBSourceConfig.validateSchema(actualSchema, SCHEMA, collector); + TEST_CONFIG.validateSchema(actualSchema, SCHEMA, collector); assertPropertyValidationFailed(collector, "boolean_column"); } diff --git a/db2-plugin/pom.xml b/db2-plugin/pom.xml index a43bcb92e..85f2d2089 100644 --- a/db2-plugin/pom.xml +++ b/db2-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 IBM DB2 plugin @@ -98,12 +98,12 @@ <_exportcontents> - io.cdap.plugin.db2.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.db2.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/generic-database-plugin/pom.xml b/generic-database-plugin/pom.xml index dbcd46d47..d7696c84c 100644 --- a/generic-database-plugin/pom.xml +++ b/generic-database-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Generic database plugin @@ -97,9 +97,9 @@ <_exportcontents> - io.cdap.plugin.jdbc.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; + io.cdap.plugin.jdbc.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; *;inline=false;scope=compile true diff --git a/generic-db-argument-setter/pom.xml b/generic-db-argument-setter/pom.xml index 8a8dcd1c4..ad084bbce 100644 --- a/generic-db-argument-setter/pom.xml +++ b/generic-db-argument-setter/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Generic database argument setter plugin @@ -97,12 +97,12 @@ <_exportcontents> - io.cdap.plugin.jdbc.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.jdbc.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/mariadb-plugin/docs/Mariadb-batchsink.md b/mariadb-plugin/docs/Mariadb-batchsink.md index 11176c0db..e4541fe67 100644 --- a/mariadb-plugin/docs/Mariadb-batchsink.md +++ b/mariadb-plugin/docs/Mariadb-batchsink.md @@ -60,41 +60,39 @@ connections. Data Types Mapping ---------- - +--------------------------------+-----------------------+------------------------------------+ - | MariaDB Data Type | CDAP Schema Data Type | Comment | - +--------------------------------+-----------------------+------------------------------------+ - | TINYINT | int | | - | BOOLEAN, BOOL | boolean | | - | SMALLINT | int | | - | MEDIUMINT | int | | - | INT, INTEGER | int | | - | BIGINT | long | | - | DECIMAL, DEC, NUMERIC, FIXED | decimal | | - | FLOAT | float | | - | DOUBLE, DOUBLE PRECISION, REAL | decimal | | - | BIT | boolean | | - | CHAR | string | | - | VARCHAR | string | | - | BINARY | bytes | | - | CHAR BYTE | bytes | | - | VARBINARY | bytes | | - | TINYBLOB | bytes | | - | BLOB | bytes | | - | MEDIUMBLOB | bytes | | - | LONGBLOB | bytes | | - | TINYTEXT | string | | - | TEXT | string | | - | MEDIUMTEXT | string | | - | LONGTEXT | string | | - | JSON | string | In MariaDB it is alias to LONGTEXT | - | ENUM | string | Mapping to String by default | - | SET | string | | - | DATE | date | | - | TIME | time_micros | | - | DATETIME | timestamp_micros | | - | TIMESTAMP | timestamp_micros | | - | YEAR | date | | - +--------------------------------+-----------------------+------------------------------------+ + | MariaDB Data Type | CDAP Schema Data Type | Comment | + |--------------------------------|-----------------------|---------------------------------------------------------| + | TINYINT | int | | + | BOOLEAN, BOOL | boolean | | + | SMALLINT | int | | + | MEDIUMINT | int | | + | INT, INTEGER | int | | + | BIGINT | long | | + | DECIMAL, DEC, NUMERIC, FIXED | decimal | | + | FLOAT | float | | + | DOUBLE, DOUBLE PRECISION, REAL | decimal | | + | BIT | boolean | | + | CHAR | string | | + | VARCHAR | string | | + | BINARY | bytes | | + | CHAR BYTE | bytes | | + | VARBINARY | bytes | | + | TINYBLOB | bytes | | + | BLOB | bytes | | + | MEDIUMBLOB | bytes | | + | LONGBLOB | bytes | | + | TINYTEXT | string | | + | TEXT | string | | + | MEDIUMTEXT | string | | + | LONGTEXT | string | | + | JSON | string | In MariaDB it is alias to LONGTEXT | + | ENUM | string | Mapping to String by default | + | SET | string | | + | DATE | date | | + | TIME | time_micros | | + | DATETIME | timestamp_micros | | + | TIMESTAMP | timestamp_micros | | + | YEAR | int | Users can manually set output schema to map it to Date. | Example ------- diff --git a/mariadb-plugin/docs/Mariadb-batchsource.md b/mariadb-plugin/docs/Mariadb-batchsource.md index 2b1fe3944..713af2ee8 100644 --- a/mariadb-plugin/docs/Mariadb-batchsource.md +++ b/mariadb-plugin/docs/Mariadb-batchsource.md @@ -78,43 +78,39 @@ with the tradeoff of higher memory usage. Data Types Mapping ---------- - - +--------------------------------+-----------------------+------------------------------------+ - | MariaDB Data Type | CDAP Schema Data Type | Comment | - +--------------------------------+-----------------------+------------------------------------+ - | TINYINT | int | | - | BOOLEAN, BOOL | boolean | | - | SMALLINT | int | | - | MEDIUMINT | int | | - | INT, INTEGER | int | | - | BIGINT | long | | - | DECIMAL, DEC, NUMERIC, FIXED | decimal | | - | FLOAT | float | | - | DOUBLE, DOUBLE PRECISION, REAL | decimal | | - | BIT | boolean | | - | CHAR | string | | - | VARCHAR | string | | - | BINARY | bytes | | - | CHAR BYTE | bytes | | - | VARBINARY | bytes | | - | TINYBLOB | bytes | | - | BLOB | bytes | | - | MEDIUMBLOB | bytes | | - | LONGBLOB | bytes | | - | TINYTEXT | string | | - | TEXT | string | | - | MEDIUMTEXT | string | | - | LONGTEXT | string | | - | JSON | string | In MariaDB it is alias to LONGTEXT | - | ENUM | string | Mapping to String by default | - | SET | string | | - | DATE | date | | - | TIME | time_micros | | - | DATETIME | timestamp_micros | | - | TIMESTAMP | timestamp_micros | | - | YEAR | date | | - +--------------------------------+-----------------------+------------------------------------+ - + | MariaDB Data Type | CDAP Schema Data Type | Comment | + |--------------------------------|-----------------------|---------------------------------------------------------| + | TINYINT | int | | + | BOOLEAN, BOOL | boolean | | + | SMALLINT | int | | + | MEDIUMINT | int | | + | INT, INTEGER | int | | + | BIGINT | long | | + | DECIMAL, DEC, NUMERIC, FIXED | decimal | | + | FLOAT | float | | + | DOUBLE, DOUBLE PRECISION, REAL | decimal | | + | BIT | boolean | | + | CHAR | string | | + | VARCHAR | string | | + | BINARY | bytes | | + | CHAR BYTE | bytes | | + | VARBINARY | bytes | | + | TINYBLOB | bytes | | + | BLOB | bytes | | + | MEDIUMBLOB | bytes | | + | LONGBLOB | bytes | | + | TINYTEXT | string | | + | TEXT | string | | + | MEDIUMTEXT | string | | + | LONGTEXT | string | | + | JSON | string | In MariaDB it is alias to LONGTEXT | + | ENUM | string | Mapping to String by default | + | SET | string | | + | DATE | date | | + | TIME | time_micros | | + | DATETIME | timestamp_micros | | + | TIMESTAMP | timestamp_micros | | + | YEAR | int | Users can manually set output schema to map it to Date. | Example ------ diff --git a/mariadb-plugin/pom.xml b/mariadb-plugin/pom.xml index 0e9a09e02..bdb8cfb2f 100644 --- a/mariadb-plugin/pom.xml +++ b/mariadb-plugin/pom.xml @@ -17,108 +17,113 @@ - - database-plugins-parent - io.cdap.plugin - 1.11.0-SNAPSHOT - + + database-plugins-parent + io.cdap.plugin + 1.11.8 + - Maria DB plugin - mariadb-plugin - 4.0.0 + Maria DB plugin + mariadb-plugin + 4.0.0 - - - io.cdap.cdap - cdap-etl-api - - - io.cdap.plugin - database-commons - ${project.version} - - - io.cdap.plugin - hydrator-common - - - com.google.guava - guava - + + + io.cdap.cdap + cdap-etl-api + + + io.cdap.plugin + database-commons + ${project.version} + + + io.cdap.plugin + hydrator-common + + + com.google.guava + guava + - - - io.cdap.plugin - database-commons - ${project.version} - test-jar - test - - - io.cdap.cdap - hydrator-test - - - io.cdap.cdap - cdap-data-pipeline3_2.12 - - - junit - junit - - - io.cdap.cdap - cdap-api - provided - - - org.mariadb.jdbc - mariadb-java-client - 2.7.3 - test - - - org.jetbrains - annotations - RELEASE - compile - - - - - - io.cdap - cdap-maven-plugin - - - org.apache.felix - maven-bundle-plugin - 5.1.2 - true - - - <_exportcontents> - io.cdap.plugin.mariadb.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* - - *;inline=false;scope=compile - true - lib - - - - - package - - bundle - - - - - - + + + io.cdap.plugin + database-commons + ${project.version} + test-jar + test + + + io.cdap.cdap + hydrator-test + + + io.cdap.cdap + cdap-data-pipeline3_2.12 + + + junit + junit + + + io.cdap.cdap + cdap-api + provided + + + org.mariadb.jdbc + mariadb-java-client + 2.7.3 + test + + + org.jetbrains + annotations + RELEASE + compile + + + io.cdap.plugin + mysql-plugin + ${project.version} + + + + + + io.cdap + cdap-maven-plugin + + + org.apache.felix + maven-bundle-plugin + 5.1.2 + true + + + <_exportcontents> + io.cdap.plugin.mariadb.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* + + *;inline=false;scope=compile + true + lib + + + + + package + + bundle + + + + + + diff --git a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbDBRecord.java b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbDBRecord.java new file mode 100644 index 000000000..94498c787 --- /dev/null +++ b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbDBRecord.java @@ -0,0 +1,40 @@ +/* + * Copyright © 2025 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.mariadb; + +import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.plugin.db.ColumnType; +import io.cdap.plugin.mysql.MysqlDBRecord; +import java.util.List; + +/** + * Writable class for MariaDB Source/Sink. + */ +public class MariadbDBRecord extends MysqlDBRecord { + + /** + * Used in map-reduce. Do not remove. + */ + @SuppressWarnings("unused") + public MariadbDBRecord() { + // Required by Hadoop DBRecordReader to create an instance + } + + public MariadbDBRecord(StructuredRecord record, List columnTypes) { + super(record, columnTypes); + } +} diff --git a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbFieldsValidator.java b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbFieldsValidator.java new file mode 100644 index 000000000..71ccb0d06 --- /dev/null +++ b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbFieldsValidator.java @@ -0,0 +1,25 @@ +/* + * Copyright © 2025 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.mariadb; + +import io.cdap.plugin.mysql.MysqlFieldsValidator; + +/** + * Field validator for maraidb + */ +public class MariadbFieldsValidator extends MysqlFieldsValidator { +} diff --git a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSchemaReader.java b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSchemaReader.java new file mode 100644 index 000000000..37ac12a93 --- /dev/null +++ b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSchemaReader.java @@ -0,0 +1,36 @@ +/* + * Copyright © 2025 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.mariadb; + + +import io.cdap.plugin.mysql.MysqlSchemaReader; +import java.util.Map; + +/** + * Schema reader for mapping Maria DB type + */ +public class MariadbSchemaReader extends MysqlSchemaReader { + + public MariadbSchemaReader (String sessionID) { + super(sessionID); + } + + public MariadbSchemaReader (String sessionID, Map connectionArguments) { + super(sessionID, connectionArguments); + } + +} diff --git a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSink.java b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSink.java index ab20f3c5d..57455cbdc 100644 --- a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSink.java +++ b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSink.java @@ -19,9 +19,13 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.api.data.format.StructuredRecord; import io.cdap.cdap.etl.api.batch.BatchSink; +import io.cdap.plugin.db.DBRecord; +import io.cdap.plugin.db.SchemaReader; import io.cdap.plugin.db.config.DBSpecificSinkConfig; import io.cdap.plugin.db.sink.AbstractDBSink; +import io.cdap.plugin.db.sink.FieldsValidator; import java.util.Map; import javax.annotation.Nullable; @@ -45,6 +49,22 @@ public MariadbSink(MariadbSinkConfig mariadbSinkConfig) { this.mariadbSinkConfig = mariadbSinkConfig; } + @Override + protected DBRecord getDBRecord(StructuredRecord output) { + return new MariadbDBRecord(output, columnTypes); + } + + @Override + protected SchemaReader getSchemaReader() { + return new MariadbSchemaReader(null); + } + + + @Override + protected FieldsValidator getFieldsValidator() { + return new MariadbFieldsValidator(); + } + /** * MariaDB Sink Config. */ diff --git a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSource.java b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSource.java index d5ffcb290..3a473dca7 100644 --- a/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSource.java +++ b/mariadb-plugin/src/main/java/io/cdap/plugin/mariadb/MariadbSource.java @@ -19,10 +19,19 @@ import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; +import io.cdap.cdap.api.data.schema.Schema; +import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.cdap.etl.api.batch.BatchSource; +import io.cdap.cdap.etl.api.batch.BatchSourceContext; +import io.cdap.plugin.common.Asset; +import io.cdap.plugin.common.LineageRecorder; +import io.cdap.plugin.db.SchemaReader; import io.cdap.plugin.db.config.DBSpecificSourceConfig; import io.cdap.plugin.db.source.AbstractDBSource; +import io.cdap.plugin.util.DBUtils; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; +import java.util.HashMap; import java.util.List; import java.util.Map; import javax.annotation.Nullable; @@ -53,10 +62,36 @@ protected String createConnectionString() { mariadbSourceConfig.host, mariadbSourceConfig.port, mariadbSourceConfig.database); } + @Override + protected Class getDBRecordType() { + return MariadbDBRecord.class; + } + + @Override + protected LineageRecorder getLineageRecorder(BatchSourceContext context) { + String fqn = DBUtils.constructFQN("mariadb", + mariadbSourceConfig.host, + mariadbSourceConfig.port, + mariadbSourceConfig.database, + mariadbSourceConfig.getReferenceName()); + Asset asset = Asset.builder(mariadbSourceConfig.getReferenceName()).setFqn(fqn).build(); + return new LineageRecorder(context, asset); + } + + @Override + protected SchemaReader getSchemaReader() { + return new MariadbSchemaReader(null, mariadbSourceConfig.getConnectionArguments()); + } + /** * MaraiDB source mariadbSourceConfig. */ public static class MariadbSourceConfig extends DBSpecificSourceConfig { + private static final String JDBC_PROPERTY_CONNECT_TIMEOUT = "connectTimeout"; + private static final String JDBC_PROPERTY_SOCKET_TIMEOUT = "socketTimeout"; + private static final String JDBC_REWRITE_BATCHED_STATEMENTS = "rewriteBatchedStatements"; + + private static final String MARIADB_TINYINT1_IS_BIT = "tinyInt1isBit"; @Name(MariadbConstants.AUTO_RECONNECT) @Description("Should the driver try to re-establish stale and/or dead connections") @@ -116,5 +151,43 @@ public Map getDBSpecificArguments() { public List getInitQueries() { return MariadbUtil.composeDbInitQueries(useAnsiQuotes); } + + @Override + public Map getConnectionArguments() { + Map arguments = new HashMap<>(super.getConnectionArguments()); + // the unit below is millisecond + arguments.putIfAbsent(JDBC_PROPERTY_CONNECT_TIMEOUT, "20000"); + arguments.putIfAbsent(JDBC_PROPERTY_SOCKET_TIMEOUT, "20000"); + arguments.putIfAbsent(JDBC_REWRITE_BATCHED_STATEMENTS, "true"); + // MariaDB property to ensure that TINYINT(1) type data is not converted to MariaDB Bit/Boolean type in the + // ResultSet. + arguments.putIfAbsent(MARIADB_TINYINT1_IS_BIT, "false"); + return arguments; + } + + @Override + protected void validateField(FailureCollector collector, + Schema.Field field, + Schema actualFieldSchema, + Schema expectedFieldSchema) { + // Backward compatibility changes to support MySQL YEAR to Date type conversion + if (Schema.LogicalType.DATE.equals(expectedFieldSchema.getLogicalType()) + && Schema.Type.INT.equals(actualFieldSchema.getType())) { + return; + } + + // Backward compatibility change to support MySQL MEDIUMINT UNSIGNED to Long type conversion + if (Schema.Type.LONG.equals(expectedFieldSchema.getType()) + && Schema.Type.INT.equals(actualFieldSchema.getType())) { + return; + } + + // Backward compatibility change to support MySQL TINYINT(1) to Bool type conversion + if (Schema.Type.BOOLEAN.equals(expectedFieldSchema.getType()) + && Schema.Type.INT.equals(actualFieldSchema.getType())) { + return; + } + super.validateField(collector, field, actualFieldSchema, expectedFieldSchema); + } } } diff --git a/memsql-plugin/pom.xml b/memsql-plugin/pom.xml index 5c50a857e..f943e43d4 100644 --- a/memsql-plugin/pom.xml +++ b/memsql-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Memsql plugin @@ -95,12 +95,12 @@ <_exportcontents> - io.cdap.plugin.memsql.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.memsql.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/mssql-plugin/docs/SQL Server-connector.md b/mssql-plugin/docs/SQL Server-connector.md index cb72161f5..6f0038715 100644 --- a/mssql-plugin/docs/SQL Server-connector.md +++ b/mssql-plugin/docs/SQL Server-connector.md @@ -22,6 +22,14 @@ authentication. Optional for databases that do not require authentication. **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the database connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- TRANSACTION_READ_UNCOMMITTED: Allows dirty reads (reading uncommitted changes from other transactions). Non-repeatable reads and phantom reads are possible. + +For more details on the Transaction Isolation Levels supported in SQL Server, refer to the [SQL Server documentation](https://learn.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql?view=sql-server-ver16) + **Authentication Type:** Indicates which authentication method will be used for the connection. Use 'SQL Login'. to connect to a SQL Server using username and password properties. Use 'Active Directory Password' to connect to an Azure SQL Database/Data Warehouse using an Azure AD principal name and password. diff --git a/mssql-plugin/docs/SqlServer-batchsink.md b/mssql-plugin/docs/SqlServer-batchsink.md index 5d10b4bb6..b4ca1cbc5 100644 --- a/mssql-plugin/docs/SqlServer-batchsink.md +++ b/mssql-plugin/docs/SqlServer-batchsink.md @@ -46,6 +46,14 @@ an Azure SQL Database/Data Warehouse using an Azure AD principal name and passwo **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the database connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- TRANSACTION_READ_UNCOMMITTED: Allows dirty reads (reading uncommitted changes from other transactions). Non-repeatable reads and phantom reads are possible. + +For more details on the Transaction Isolation Levels supported in SQL Server, refer to the [SQL Server documentation](https://learn.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql?view=sql-server-ver16) + **Instance Name:** SQL Server instance name to connect to. When it is not specified, a connection is made to the default instance. For the case where both the instanceName and port are specified, see the notes for port. If you specify a Virtual Network Name in the Server connection property, you cannot diff --git a/mssql-plugin/docs/SqlServer-batchsource.md b/mssql-plugin/docs/SqlServer-batchsource.md index c8e30f77e..5c917621c 100644 --- a/mssql-plugin/docs/SqlServer-batchsource.md +++ b/mssql-plugin/docs/SqlServer-batchsource.md @@ -56,6 +56,14 @@ an Azure SQL Database/Data Warehouse using an Azure AD principal name and passwo **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the database connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- TRANSACTION_READ_UNCOMMITTED: Allows dirty reads (reading uncommitted changes from other transactions). Non-repeatable reads and phantom reads are possible. + +For more details on the Transaction Isolation Levels supported in SQL Server, refer to the [SQL Server documentation](https://learn.microsoft.com/en-us/sql/t-sql/statements/set-transaction-isolation-level-transact-sql?view=sql-server-ver16) + **Instance Name:** SQL Server instance name to connect to. When it is not specified, a connection is made to the default instance. For the case where both the instanceName and port are specified, see the notes for port. If you specify a Virtual Network Name in the Server connection property, you cannot diff --git a/mssql-plugin/pom.xml b/mssql-plugin/pom.xml index 45e2b9c03..6d4474256 100644 --- a/mssql-plugin/pom.xml +++ b/mssql-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Microsoft SQL Server plugin diff --git a/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSink.java b/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSink.java index 0fa8991c5..7b749cdc5 100644 --- a/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSink.java +++ b/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSink.java @@ -167,6 +167,11 @@ public Map getDBSpecificArguments() { packetSize, queryTimeout); } + @Override + public String getTransactionIsolationLevel() { + return connection.getTransactionIsolationLevel(); + } + @Override public String getConnectionString() { return String.format(SqlServerConstants.SQL_SERVER_CONNECTION_STRING_FORMAT, diff --git a/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSource.java b/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSource.java index 9603b24db..a76ed732d 100644 --- a/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSource.java +++ b/mssql-plugin/src/main/java/io/cdap/plugin/mssql/SqlServerSource.java @@ -188,6 +188,11 @@ public List getInitQueries() { return Collections.emptyList(); } + @Override + public String getTransactionIsolationLevel() { + return connection.getTransactionIsolationLevel(); + } + @Override public void validate(FailureCollector collector) { ConfigUtil.validateConnection(this, useConnection, connection, collector); diff --git a/mssql-plugin/widgets/SQL Server-connector.json b/mssql-plugin/widgets/SQL Server-connector.json index 171076295..c326cd81d 100644 --- a/mssql-plugin/widgets/SQL Server-connector.json +++ b/mssql-plugin/widgets/SQL Server-connector.json @@ -64,6 +64,20 @@ "widget-type": "password", "label": "Password", "name": "password" + }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_UNCOMMITTED", + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } } ] }, diff --git a/mssql-plugin/widgets/SqlServer-batchsink.json b/mssql-plugin/widgets/SqlServer-batchsink.json index 260c66259..fb20cad9d 100644 --- a/mssql-plugin/widgets/SqlServer-batchsink.json +++ b/mssql-plugin/widgets/SqlServer-batchsink.json @@ -84,6 +84,20 @@ "label": "Password", "name": "password" }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_UNCOMMITTED", + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "keyvalue", "label": "Connection Arguments", @@ -280,6 +294,10 @@ { "type": "property", "name": "connectionArguments" + }, + { + "type": "property", + "name": "transactionIsolationLevel" } ] }, diff --git a/mssql-plugin/widgets/SqlServer-batchsource.json b/mssql-plugin/widgets/SqlServer-batchsource.json index dad5f4708..b3494e485 100644 --- a/mssql-plugin/widgets/SqlServer-batchsource.json +++ b/mssql-plugin/widgets/SqlServer-batchsource.json @@ -84,6 +84,20 @@ "label": "Password", "name": "password" }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_UNCOMMITTED", + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "keyvalue", "label": "Connection Arguments", @@ -316,6 +330,10 @@ { "type": "property", "name": "connectionArguments" + }, + { + "type": "property", + "name": "transactionIsolationLevel" } ] }, diff --git a/mysql-plugin/docs/MySQL-connector.md b/mysql-plugin/docs/MySQL-connector.md index fb5c1fbb8..f586084c1 100644 --- a/mysql-plugin/docs/MySQL-connector.md +++ b/mysql-plugin/docs/MySQL-connector.md @@ -22,6 +22,14 @@ authentication. Optional for databases that do not require authentication. **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the databse connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- TRANSACTION_READ_UNCOMMITTED: Allows dirty reads (reading uncommitted changes from other transactions). Non-repeatable reads and phantom reads are possible. + +For more details on the Transaction Isolation Levels supported in MySQL, refer to the [MySQL documentation](https://dev.mysql.com/doc/refman/8.4/en/innodb-transaction-isolation-levels.html) + **Connection Arguments:** A list of arbitrary string tag/value pairs as connection arguments. These arguments will be passed to the JDBC driver, as connection arguments, for JDBC drivers that may need additional configurations. This is a semicolon-separated list of key-value pairs, where each pair is separated by a equals '=' and specifies diff --git a/mysql-plugin/docs/Mysql-batchsink.md b/mysql-plugin/docs/Mysql-batchsink.md index b28a28618..46a763f9d 100644 --- a/mysql-plugin/docs/Mysql-batchsink.md +++ b/mysql-plugin/docs/Mysql-batchsink.md @@ -39,6 +39,14 @@ You also can use the macro function ${conn(connection-name)}. **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the databse connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- TRANSACTION_READ_UNCOMMITTED: Allows dirty reads (reading uncommitted changes from other transactions). Non-repeatable reads and phantom reads are possible. + +For more details on the Transaction Isolation Levels supported in MySQL, refer to the [MySQL documentation](https://dev.mysql.com/doc/refman/8.4/en/innodb-transaction-isolation-levels.html) + **Connection Arguments:** A list of arbitrary string key/value pairs as connection arguments. These arguments will be passed to the JDBC driver as connection arguments for JDBC drivers that may need additional configurations. diff --git a/mysql-plugin/docs/Mysql-batchsource.md b/mysql-plugin/docs/Mysql-batchsource.md index 010e08216..552bb5504 100644 --- a/mysql-plugin/docs/Mysql-batchsource.md +++ b/mysql-plugin/docs/Mysql-batchsource.md @@ -49,6 +49,14 @@ For example, 'SELECT MIN(id),MAX(id) FROM table'. Not required if numSplits is s **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the database connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- TRANSACTION_READ_UNCOMMITTED: Allows dirty reads (reading uncommitted changes from other transactions). Non-repeatable reads and phantom reads are possible. + +For more details on the Transaction Isolation Levels supported in MySQL, refer to the [MySQL documentation](https://dev.mysql.com/doc/refman/8.4/en/innodb-transaction-isolation-levels.html) + **Connection Arguments:** A list of arbitrary string key/value pairs as connection arguments. These arguments will be passed to the JDBC driver as connection arguments for JDBC drivers that may need additional configurations. diff --git a/mysql-plugin/pom.xml b/mysql-plugin/pom.xml index f691a15f2..44f904dfe 100644 --- a/mysql-plugin/pom.xml +++ b/mysql-plugin/pom.xml @@ -20,13 +20,13 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Mysql plugin mysql-plugin 4.0.0 - + io.cdap.cdap diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnector.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnector.java index 3dede5d49..e7e935135 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnector.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnector.java @@ -16,6 +16,7 @@ package io.cdap.plugin.mysql; +import com.google.common.collect.Maps; import io.cdap.cdap.api.annotation.Category; import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Name; @@ -62,7 +63,7 @@ public boolean supportSchema() { @Override protected SchemaReader getSchemaReader(String sessionID) { - return new MysqlSchemaReader(sessionID); + return new MysqlSchemaReader(sessionID, Maps.fromProperties(config.getConnectionArgumentsProperties())); } @Override diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnectorConfig.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnectorConfig.java index 9b481e4fe..8c20798d3 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnectorConfig.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConnectorConfig.java @@ -57,9 +57,9 @@ public int getDefaultPort() { public Properties getConnectionArgumentsProperties() { Properties prop = super.getConnectionArgumentsProperties(); // the unit below is milli-second - prop.put(JDBC_PROPERTY_CONNECT_TIMEOUT, "20000"); - prop.put(JDBC_PROPERTY_SOCKET_TIMEOUT, "20000"); - prop.put(JDBC_REWRITE_BATCHED_STATEMENTS, "true"); + prop.putIfAbsent(JDBC_PROPERTY_CONNECT_TIMEOUT, "20000"); + prop.putIfAbsent(JDBC_PROPERTY_SOCKET_TIMEOUT, "20000"); + prop.putIfAbsent(JDBC_REWRITE_BATCHED_STATEMENTS, "true"); // MySQL property to ensure that TINYINT(1) type data is not converted to MySQL Bit/Boolean type in the ResultSet. prop.putIfAbsent(MYSQL_TINYINT1_IS_BIT, "false"); return prop; diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConstants.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConstants.java index 39c0b8d08..54593f580 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConstants.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlConstants.java @@ -39,6 +39,7 @@ private MysqlConstants() { public static final String TRUST_CERT_KEYSTORE_PASSWORD = "trustCertificateKeyStorePassword"; public static final String MYSQL_CONNECTION_STRING_FORMAT = "jdbc:mysql://%s:%s/%s"; public static final String USE_CURSOR_FETCH = "useCursorFetch"; + public static final String ZERO_DATE_TIME_BEHAVIOR = "zeroDateTimeBehavior"; /** * Query to set SQL_MODE system variable. diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlDBRecord.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlDBRecord.java index 0560b10c3..94b711786 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlDBRecord.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlDBRecord.java @@ -93,4 +93,13 @@ protected void writeNonNullToDB(PreparedStatement stmt, Schema fieldSchema, super.writeNonNullToDB(stmt, fieldSchema, fieldName, fieldIndex); } + + @Override + protected void insertOperation(PreparedStatement stmt) throws SQLException { + for (int fieldIndex = 0; fieldIndex < columnTypes.size(); fieldIndex++) { + ColumnType columnType = columnTypes.get(fieldIndex); + Schema.Field field = record.getSchema().getField(columnType.getName(), true); + writeToDB(stmt, field, fieldIndex); + } + } } diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSchemaReader.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSchemaReader.java index a842ba568..50907c063 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSchemaReader.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSchemaReader.java @@ -16,12 +16,16 @@ package io.cdap.plugin.mysql; +import com.google.common.collect.Lists; import io.cdap.cdap.api.data.schema.Schema; import io.cdap.plugin.db.CommonSchemaReader; +import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; +import java.util.List; +import java.util.Map; /** * Schema reader for mapping Mysql DB type @@ -31,12 +35,42 @@ public class MysqlSchemaReader extends CommonSchemaReader { public static final String YEAR_TYPE_NAME = "YEAR"; public static final String MEDIUMINT_UNSIGNED_TYPE_NAME = "MEDIUMINT UNSIGNED"; private final String sessionID; + private boolean zeroDateTimeToNull; public MysqlSchemaReader(String sessionID) { super(); this.sessionID = sessionID; } + public MysqlSchemaReader(String sessionID, Map connectionArguments) { + super(); + this.sessionID = sessionID; + this.zeroDateTimeToNull = MysqlUtil.isZeroDateTimeToNull(connectionArguments); + } + + @Override + public List getSchemaFields(ResultSet resultSet) throws SQLException { + List schemaFields = Lists.newArrayList(); + ResultSetMetaData metadata = resultSet.getMetaData(); + // ResultSetMetadata columns are numbered starting with 1 + for (int i = 1; i <= metadata.getColumnCount(); i++) { + if (shouldIgnoreColumn(metadata, i)) { + continue; + } + + String columnName = metadata.getColumnName(i); + Schema columnSchema = getSchema(metadata, i); + + if (ResultSetMetaData.columnNullable == metadata.isNullable(i) + || (zeroDateTimeToNull && MysqlUtil.isDateTimeLikeType(metadata.getColumnType(i)))) { + columnSchema = Schema.nullableOf(columnSchema); + } + Schema.Field field = Schema.Field.of(columnName, columnSchema); + schemaFields.add(field); + } + return schemaFields; + } + @Override public boolean shouldIgnoreColumn(ResultSetMetaData metadata, int index) throws SQLException { return metadata.getColumnName(index).equals("c_" + sessionID) || diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSink.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSink.java index c839cb12b..bf3b6fe5b 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSink.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSink.java @@ -16,6 +16,7 @@ package io.cdap.plugin.mysql; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import io.cdap.cdap.api.annotation.Description; import io.cdap.cdap.api.annotation.Macro; @@ -24,6 +25,7 @@ import io.cdap.cdap.api.annotation.Name; import io.cdap.cdap.api.annotation.Plugin; import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.api.data.schema.Schema; import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.cdap.etl.api.batch.BatchSink; import io.cdap.cdap.etl.api.batch.BatchSinkContext; @@ -39,9 +41,12 @@ import io.cdap.plugin.db.sink.FieldsValidator; import io.cdap.plugin.util.DBUtils; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.StringJoiner; +import java.util.stream.Collectors; import javax.annotation.Nullable; /** @@ -54,6 +59,7 @@ public class MysqlSink extends AbstractDBSink { private final MysqlSinkConfig mysqlSinkConfig; + private static final Character ESCAPE_CHAR = '`'; public MysqlSink(MysqlSinkConfig mysqlSinkConfig) { super(mysqlSinkConfig); @@ -85,6 +91,24 @@ protected SchemaReader getSchemaReader() { return new MysqlSchemaReader(null); } + @Override + protected void setColumnsInfo(List fields) { + List columnsList = new ArrayList<>(); + StringJoiner columnsJoiner = new StringJoiner(","); + for (Schema.Field field : fields) { + columnsList.add(field.getName()); + columnsJoiner.add(ESCAPE_CHAR + field.getName() + ESCAPE_CHAR); + } + + super.columns = Collections.unmodifiableList(columnsList); + super.dbColumns = columnsJoiner.toString(); + } + + @VisibleForTesting + String getDbColumns() { + return dbColumns; + } + /** * MySQL action configuration. */ @@ -160,6 +184,11 @@ public Map getDBSpecificArguments() { trustCertificateKeyStorePassword, false); } + @Override + public String getTransactionIsolationLevel() { + return connection.getTransactionIsolationLevel(); + } + @Override public MysqlConnectorConfig getConnection() { return connection; diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSource.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSource.java index 71f113436..b8fd3975c 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSource.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlSource.java @@ -81,7 +81,7 @@ protected LineageRecorder getLineageRecorder(BatchSourceContext context) { @Override protected SchemaReader getSchemaReader() { - return new MysqlSchemaReader(null); + return new MysqlSchemaReader(null, mysqlSourceConfig.getConnectionArguments()); } /** @@ -187,6 +187,11 @@ public MysqlConnectorConfig getConnection() { return connection; } + @Override + public String getTransactionIsolationLevel() { + return connection.getTransactionIsolationLevel(); + } + @Override public void validate(FailureCollector collector) { ConfigUtil.validateConnection(this, useConnection, connection, collector); diff --git a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlUtil.java b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlUtil.java index c1c770c06..abb4aa27b 100644 --- a/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlUtil.java +++ b/mysql-plugin/src/main/java/io/cdap/plugin/mysql/MysqlUtil.java @@ -18,6 +18,7 @@ import com.google.common.collect.ImmutableMap; +import java.sql.Types; import java.util.Map; /** @@ -91,4 +92,20 @@ public static Map composeDbSpecificArgumentsMap(Boolean autoReco public static String getConnectionString(String host, Integer port, String database) { return String.format(MysqlConstants.MYSQL_CONNECTION_STRING_FORMAT, host, port, database); } + + public static boolean isDateTimeLikeType(int columnType) { + int[] dateTimeLikeTypes = new int[]{Types.TIMESTAMP, Types.TIMESTAMP_WITH_TIMEZONE, Types.DATE}; + + for (int dttType : dateTimeLikeTypes) { + if (dttType == columnType) { + return true; + } + } + return false; + } + + public static boolean isZeroDateTimeToNull(Map connectionArguments) { + String argValue = connectionArguments.getOrDefault(MysqlConstants.ZERO_DATE_TIME_BEHAVIOR, ""); + return argValue.equals("CONVERT_TO_NULL") || argValue.equals("convertToNull"); + } } diff --git a/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlFailedConnectionTest.java b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlFailedConnectionTest.java index a1be6a754..5c4f35828 100644 --- a/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlFailedConnectionTest.java +++ b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlFailedConnectionTest.java @@ -31,10 +31,26 @@ public void test() throws ClassNotFoundException, IOException { new MysqlConnectorConfig("localhost", 3306, "username", "password", "jdbc", "")); super.test(JDBC_DRIVER_CLASS_NAME, connector, "Failed to create connection to database via connection string: " + - "jdbc:mysql://localhost:3306 and arguments: {user=username, " + - "rewriteBatchedStatements=true, " + - "connectTimeout=20000, tinyInt1isBit=false, " + - "socketTimeout=20000}. Error: " + - "ConnectException: Connection refused (Connection refused)."); + "jdbc:mysql://localhost:3306 and arguments: {user=username, " + + "rewriteBatchedStatements=true, " + + "connectTimeout=20000, tinyInt1isBit=false, " + + "socketTimeout=20000}. Error: " + + "ConnectException: Connection refused (Connection refused)."); } + + @Test + public void testWithUpdatedConnectionArguments() throws ClassNotFoundException, IOException { + + MysqlConnector connector = new MysqlConnector( + new MysqlConnectorConfig("localhost", 3306, "username", "password", "jdbc", + "connectTimeout=30000;socketTimeout=30000")); + + super.test(JDBC_DRIVER_CLASS_NAME, connector, "Failed to create connection to database via connection string: " + + "jdbc:mysql://localhost:3306 and arguments: {user=username, " + + "rewriteBatchedStatements=true, " + + "connectTimeout=30000, tinyInt1isBit=false, " + + "socketTimeout=30000}. Error: " + + "ConnectException: Connection refused (Connection refused)."); + } + } diff --git a/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSchemaReaderUnitTest.java b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSchemaReaderUnitTest.java index 28582bc3b..fa7029c8f 100644 --- a/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSchemaReaderUnitTest.java +++ b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSchemaReaderUnitTest.java @@ -21,9 +21,13 @@ import org.junit.Test; import org.mockito.Mockito; +import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class MysqlSchemaReaderUnitTest { @@ -37,4 +41,33 @@ public void validateYearTypeToStringTypeConversion() throws SQLException { Schema schema = schemaReader.getSchema(metadata, 1); Assert.assertTrue(Schema.of(Schema.Type.INT).equals(schema)); } + + @Test + public void validateZeroDateTimeBehavior() throws SQLException { + ResultSet resultSet = Mockito.mock(ResultSet.class); + ResultSetMetaData metadata = Mockito.mock(ResultSetMetaData.class); + Mockito.when(resultSet.getMetaData()).thenReturn(metadata); + + Mockito.when(metadata.getColumnCount()).thenReturn(1); + Mockito.when(metadata.getColumnName(Mockito.eq(1))).thenReturn("some_date"); + + Mockito.when(metadata.getColumnType(Mockito.eq(1))).thenReturn(Types.DATE); + Mockito.when(metadata.getColumnTypeName(Mockito.eq(1))).thenReturn(MysqlSchemaReader.YEAR_TYPE_NAME); + + // non-nullable column + Mockito.when(metadata.isNullable(Mockito.eq(1))).thenReturn(0); + + // test that non-nullable date remains non-nullable when no conn arg is present + MysqlSchemaReader schemaReader = new MysqlSchemaReader(null); + List schemaFields = schemaReader.getSchemaFields(resultSet); + Assert.assertFalse(schemaFields.get(0).getSchema().isNullable()); + + // test that it converts non-nullable date column to nullable when zeroDateTimeBehavior is convert to null + Map connectionArguments = new HashMap<>(); + connectionArguments.put("zeroDateTimeBehavior", "CONVERT_TO_NULL"); + + schemaReader = new MysqlSchemaReader(null, connectionArguments); + schemaFields = schemaReader.getSchemaFields(resultSet); + Assert.assertTrue(schemaFields.get(0).getSchema().isNullable()); + } } diff --git a/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSinkTest.java b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSinkTest.java new file mode 100644 index 000000000..1dd4e809e --- /dev/null +++ b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlSinkTest.java @@ -0,0 +1,35 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.mysql; + +import io.cdap.cdap.api.data.schema.Schema; +import org.junit.Assert; +import org.junit.Test; + +public class MysqlSinkTest { + @Test + public void testSetColumnsInfo() { + Schema outputSchema = Schema.recordOf("output", + Schema.Field.of("id", Schema.of(Schema.Type.INT)), + Schema.Field.of("name", Schema.of(Schema.Type.STRING)), + Schema.Field.of("insert", Schema.of(Schema.Type.STRING))); + MysqlSink mySQLSink = new MysqlSink(new MysqlSink.MysqlSinkConfig()); + Assert.assertNotNull(outputSchema.getFields()); + mySQLSink.setColumnsInfo(outputSchema.getFields()); + Assert.assertEquals("`id`,`name`,`insert`", mySQLSink.getDbColumns()); + } +} diff --git a/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlUtilUnitTest.java b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlUtilUnitTest.java new file mode 100644 index 000000000..9481068f1 --- /dev/null +++ b/mysql-plugin/src/test/java/io/cdap/plugin/mysql/MysqlUtilUnitTest.java @@ -0,0 +1,62 @@ + +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.mysql; + +import org.junit.Test; + +import java.sql.Types; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class MysqlUtilUnitTest { + + @Test + public void testIsZeroDateTimeToNull() { + Map connArgsMap = new HashMap<>(1); + + connArgsMap.put("zeroDateTimeBehavior", ""); + assertFalse(MysqlUtil.isZeroDateTimeToNull(connArgsMap)); + + connArgsMap.put("zeroDateTimeBehavior", "ROUND"); + assertFalse(MysqlUtil.isZeroDateTimeToNull(connArgsMap)); + + connArgsMap.put("zeroDateTimeBehavior", "CONVERT_TO_NULL"); + assertTrue(MysqlUtil.isZeroDateTimeToNull(connArgsMap)); + + connArgsMap.put("zeroDateTimeBehavior", "convertToNull"); + assertTrue(MysqlUtil.isZeroDateTimeToNull(connArgsMap)); + } + + @Test + public void testIsDateTimeLikeType() { + int dateType = Types.DATE; + int timestampType = Types.TIMESTAMP; + int timestampWithTimezoneType = Types.TIMESTAMP_WITH_TIMEZONE; + int timeType = Types.TIME; + int stringType = Types.VARCHAR; + + assertTrue(MysqlUtil.isDateTimeLikeType(dateType)); + assertTrue(MysqlUtil.isDateTimeLikeType(timestampType)); + assertTrue(MysqlUtil.isDateTimeLikeType(timestampWithTimezoneType)); + assertFalse(MysqlUtil.isDateTimeLikeType(timeType)); + assertFalse(MysqlUtil.isDateTimeLikeType(stringType)); + } +} diff --git a/mysql-plugin/widgets/MySQL-connector.json b/mysql-plugin/widgets/MySQL-connector.json index 9064d1bf6..f60f5526f 100644 --- a/mysql-plugin/widgets/MySQL-connector.json +++ b/mysql-plugin/widgets/MySQL-connector.json @@ -30,6 +30,20 @@ "widget-attributes": { "default": "3306" } + }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_UNCOMMITTED", + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } } ] }, diff --git a/mysql-plugin/widgets/Mysql-batchsink.json b/mysql-plugin/widgets/Mysql-batchsink.json index c525ead40..58596aae2 100644 --- a/mysql-plugin/widgets/Mysql-batchsink.json +++ b/mysql-plugin/widgets/Mysql-batchsink.json @@ -65,6 +65,20 @@ "label": "Password", "name": "password" }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_UNCOMMITTED", + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "keyvalue", "label": "Connection Arguments", @@ -225,6 +239,10 @@ "type": "property", "name": "password" }, + { + "type": "property", + "name": "transactionIsolationLevel" + }, { "type": "property", "name": "host" diff --git a/mysql-plugin/widgets/Mysql-batchsource.json b/mysql-plugin/widgets/Mysql-batchsource.json index 9175bd5ed..506e837f7 100644 --- a/mysql-plugin/widgets/Mysql-batchsource.json +++ b/mysql-plugin/widgets/Mysql-batchsource.json @@ -65,6 +65,20 @@ "label": "Password", "name": "password" }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_UNCOMMITTED", + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "keyvalue", "label": "Connection Arguments", @@ -277,6 +291,10 @@ "type": "property", "name": "password" }, + { + "type": "property", + "name": "transactionIsolationLevel" + }, { "type": "property", "name": "host" diff --git a/netezza-plugin/pom.xml b/netezza-plugin/pom.xml index 900e430fe..1bc303e9a 100644 --- a/netezza-plugin/pom.xml +++ b/netezza-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Netezza plugin @@ -92,12 +92,12 @@ <_exportcontents> - io.cdap.plugin.netezza.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.netezza.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/oracle-plugin/pom.xml b/oracle-plugin/pom.xml index e0ed7ff50..c12bb2fde 100644 --- a/oracle-plugin/pom.xml +++ b/oracle-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 Oracle plugin @@ -113,12 +113,12 @@ <_exportcontents> - io.cdap.plugin.oracle.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.oracle.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/oracle-plugin/src/e2e-test/features/sink/OracleRunTime.feature b/oracle-plugin/src/e2e-test/features/sink/OracleRunTime.feature index c2b56e8b7..70b1bdba6 100644 --- a/oracle-plugin/src/e2e-test/features/sink/OracleRunTime.feature +++ b/oracle-plugin/src/e2e-test/features/sink/OracleRunTime.feature @@ -117,3 +117,104 @@ Feature: Oracle - Verify data transfer from BigQuery source to Oracle sink Then Verify the pipeline status is "Succeeded" Then Validate records transferred to target table with record counts of BigQuery table Then Validate the values of records transferred to target Oracle table is equal to the values from source BigQuery table + + @BQ_SOURCE_TEST_SMALL_CASE @ORACLE_TEST_TABLE + Scenario: To verify data is getting transferred from BigQuery source to Oracle sink successfully when schema is coming in small case + Given Open Datafusion Project to configure pipeline + When Expand Plugin group in the LHS plugins list: "Source" + When Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "Oracle" from the plugins list as: "Sink" + Then Connect plugins: "BigQuery" and "Oracle" to establish connection + Then Navigate to the properties page of plugin: "BigQuery" + Then Replace input plugin property: "project" with value: "projectId" + Then Enter input plugin property: "datasetProject" with value: "projectId" + Then Enter input plugin property: "referenceName" with value: "BQReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Verify the Output Schema matches the Expected Schema: "bqOutputDatatypesSchemaSmallCase" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "Oracle" + Then Select dropdown plugin property: "select-jdbcPluginName" with option value: "driverName" + Then Replace input plugin property: "host" with value: "host" for Credentials and Authorization related fields + Then Replace input plugin property: "port" with value: "port" for Credentials and Authorization related fields + Then Replace input plugin property: "user" with value: "username" for Credentials and Authorization related fields + Then Replace input plugin property: "password" with value: "password" for Credentials and Authorization related fields + Then Select radio button plugin property: "connectionType" with value: "service" + Then Select radio button plugin property: "role" with value: "normal" + Then Enter input plugin property: "referenceName" with value: "sourceRef" + Then Replace input plugin property: "database" with value: "databaseName" + Then Replace input plugin property: "tableName" with value: "targetTable" + Then Replace input plugin property: "dbSchemaName" with value: "schema" + Then Replace input plugin property: "user" with value: "username" for Credentials and Authorization related fields + Then Replace input plugin property: "password" with value: "password" for Credentials and Authorization related fields + Then Enter input plugin property: "referenceName" with value: "targetRef" + Then Select radio button plugin property: "connectionType" with value: "service" + Then Select radio button plugin property: "role" with value: "normal" + Then Validate "Oracle" plugin properties + Then Close the Plugin Properties page + Then Save the pipeline + Then Preview and run the pipeline + Then Verify the preview of pipeline is "success" + Then Click on preview data for Oracle sink + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Validate records transferred to target table with record counts of BigQuery table + Then Validate the values of records transferred to target Oracle table is equal to the values from source BigQuery table with case + + + @BQ_SOURCE_TEST_DATE @ORACLE_DATE_TABLE + Scenario: To verify data is getting transferred from BigQuery source to Oracle sink successfully when schema is having date and timestamp fields + Given Open Datafusion Project to configure pipeline + When Expand Plugin group in the LHS plugins list: "Source" + When Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "Oracle" from the plugins list as: "Sink" + Then Connect plugins: "BigQuery" and "Oracle" to establish connection + Then Navigate to the properties page of plugin: "BigQuery" + Then Replace input plugin property: "project" with value: "projectId" + Then Enter input plugin property: "datasetProject" with value: "projectId" + Then Enter input plugin property: "referenceName" with value: "BQReferenceName" + Then Enter input plugin property: "dataset" with value: "dataset" + Then Enter input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Verify the Output Schema matches the Expected Schema: "outputDatatypesDateTimeSchema" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "Oracle" + Then Select dropdown plugin property: "select-jdbcPluginName" with option value: "driverName" + Then Replace input plugin property: "host" with value: "host" for Credentials and Authorization related fields + Then Replace input plugin property: "port" with value: "port" for Credentials and Authorization related fields + Then Replace input plugin property: "user" with value: "username" for Credentials and Authorization related fields + Then Replace input plugin property: "password" with value: "password" for Credentials and Authorization related fields + Then Select radio button plugin property: "connectionType" with value: "service" + Then Select radio button plugin property: "role" with value: "normal" + Then Enter input plugin property: "referenceName" with value: "sourceRef" + Then Replace input plugin property: "database" with value: "databaseName" + Then Replace input plugin property: "tableName" with value: "targetTable" + Then Replace input plugin property: "dbSchemaName" with value: "schema" + Then Replace input plugin property: "user" with value: "username" for Credentials and Authorization related fields + Then Replace input plugin property: "password" with value: "password" for Credentials and Authorization related fields + Then Enter input plugin property: "referenceName" with value: "targetRef" + Then Select radio button plugin property: "connectionType" with value: "service" + Then Select radio button plugin property: "role" with value: "normal" + Then Validate "Oracle" plugin properties + Then Close the Plugin Properties page + Then Save the pipeline + Then Preview and run the pipeline + Then Verify the preview of pipeline is "success" + Then Click on preview data for Oracle sink + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Validate records transferred to target table with record counts of BigQuery table + Then Validate the values of records transferred to target Oracle table is equal to the values from source BigQuery table diff --git a/oracle-plugin/src/e2e-test/java/io.cdap.plugin/BQValidation.java b/oracle-plugin/src/e2e-test/java/io.cdap.plugin/BQValidation.java index 6edfcc8fd..b5a82e420 100644 --- a/oracle-plugin/src/e2e-test/java/io.cdap.plugin/BQValidation.java +++ b/oracle-plugin/src/e2e-test/java/io.cdap.plugin/BQValidation.java @@ -33,7 +33,12 @@ import java.sql.Types; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Base64; import java.util.Date; import java.util.List; @@ -44,6 +49,13 @@ public class BQValidation { + private static final List TIMESTAMP_DATE_FORMATS = Arrays.asList( + new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss"), + new SimpleDateFormat("yyyy-MM-dd")); + private static final List TIMESTAMP_TZ_DATE_FORMATS = Arrays.asList( + DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX"), + DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX")); + /** * Extracts entire data from source and target tables. * @@ -68,11 +80,12 @@ public static boolean validateDBToBQRecordValues(String schema, String sourceTab ResultSet.HOLD_CURSORS_OVER_COMMIT); ResultSet rsSource = statement1.executeQuery(getSourceQuery); - return compareResultSetAndJsonData(rsSource, jsonResponse); + return compareResultSetAndJsonData(rsSource, jsonResponse, false); } } - public static boolean validateBQToDBRecordValues(String schema, String sourceTable, String targetTable) + public static boolean validateBQToDBRecordValues(String schema, String sourceTable, String targetTable, + boolean isSchemaSmallCase) throws SQLException, ClassNotFoundException, ParseException, IOException, InterruptedException { List jsonResponse = new ArrayList<>(); List bigQueryRows = new ArrayList<>(); @@ -88,7 +101,7 @@ public static boolean validateBQToDBRecordValues(String schema, String sourceTab ResultSet.HOLD_CURSORS_OVER_COMMIT); ResultSet rsTarget = statement1.executeQuery(getTargetQuery); - return compareResultSetAndJsonData(rsTarget, jsonResponse); + return compareResultSetAndJsonData(rsTarget, jsonResponse, isSchemaSmallCase); } } @@ -119,7 +132,8 @@ private static void getBigQueryTableData(String table, List bigQueryRows * @throws ParseException If an error occurs while parsing the data. */ - public static boolean compareResultSetAndJsonData(ResultSet rsSource, List bigQueryData) + public static boolean compareResultSetAndJsonData(ResultSet rsSource, List bigQueryData, + boolean isSchemaSmallCase) throws SQLException, ParseException { ResultSetMetaData mdSource = rsSource.getMetaData(); boolean result = false; @@ -146,7 +160,8 @@ public static boolean compareResultSetAndJsonData(ResultSet rsSource, List getDBSpecificArguments() { return ImmutableMap.of(OracleConstants.DEFAULT_BATCH_VALUE, String.valueOf(defaultBatchValue)); diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnector.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnector.java index fde72c8ad..16371d5c1 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnector.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnector.java @@ -112,7 +112,8 @@ protected DBConnectorPath getDBConnectorPath(String path) { @Override protected SchemaReader getSchemaReader(String sessionID) { - return new OracleSourceSchemaReader(sessionID); + return new OracleSourceSchemaReader(sessionID, config.getTreatAsOldTimestamp(), + config.getTreatPrecisionlessNumAsDeci()); } @Override @@ -125,15 +126,8 @@ protected String getConnectionString(@Nullable String database) { if (database == null) { return config.getConnectionString(); } - if (OracleConstants.TNS_CONNECTION_TYPE.equals(config.getConnectionType())) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_TNS_FORMAT, database); - } else if (OracleConstants.SERVICE_CONNECTION_TYPE.equals(config.getConnectionType())) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT, config.getHost(), - config.getPort(), database); - } else { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SID_FORMAT, - config.getHost(), config.getPort(), database); - } + return OracleConstants.getConnectionString(config.getConnectionType(), + config.getHost(), config.getPort(), database, config.getSSlMode()); } @Override diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnectorConfig.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnectorConfig.java index 73b005243..cbc1e5ed2 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnectorConfig.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConnectorConfig.java @@ -22,8 +22,6 @@ import io.cdap.plugin.db.TransactionIsolationLevel; import io.cdap.plugin.db.connector.AbstractDBSpecificConnectorConfig; -import java.util.HashMap; -import java.util.Map; import java.util.Properties; import javax.annotation.Nullable; @@ -43,12 +41,14 @@ public OracleConnectorConfig(String host, int port, String user, String password public OracleConnectorConfig(String host, int port, String user, String password, String jdbcPluginName, String connectionArguments, String connectionType, String database) { - this(host, port, user, password, jdbcPluginName, connectionArguments, connectionType, database, null); + this(host, port, user, password, jdbcPluginName, connectionArguments, connectionType, database, null, null, null, + null); } public OracleConnectorConfig(String host, int port, String user, String password, String jdbcPluginName, String connectionArguments, String connectionType, String database, - String role) { + String role, Boolean useSSL, @Nullable Boolean treatAsOldTimestamp, + @Nullable Boolean treatPrecisionlessNumAsDeci) { this.host = host; this.port = port; @@ -59,17 +59,14 @@ public OracleConnectorConfig(String host, int port, String user, String password this.connectionType = connectionType; this.database = database; this.role = role; + this.useSSL = useSSL; + this.treatAsOldTimestamp = treatAsOldTimestamp; + this.treatPrecisionlessNumAsDeci = treatPrecisionlessNumAsDeci; } @Override public String getConnectionString() { - if (OracleConstants.TNS_CONNECTION_TYPE.equals(getConnectionType())) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_TNS_FORMAT, database); - } else if (OracleConstants.SERVICE_CONNECTION_TYPE.equals(getConnectionType())) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT, host, getPort(), database); - } else { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SID_FORMAT, host, getPort(), database); - } + return OracleConstants.getConnectionString(connectionType, host, getPort(), database, useSSL); } @Name(OracleConstants.CONNECTION_TYPE) @@ -86,11 +83,20 @@ public String getConnectionString() { @Macro private String database; - @Name(OracleConstants.TRANSACTION_ISOLATION_LEVEL) - @Description("The transaction isolation level for the database session.") - @Macro + @Name(OracleConstants.USE_SSL) + @Description("Turns on SSL encryption. Connection will fail if SSL is not available") @Nullable - private String transactionIsolationLevel; + public Boolean useSSL; + + @Name(OracleConstants.TREAT_AS_OLD_TIMESTAMP) + @Description("A hidden field to handle timestamp as CDAP's timestamp micros or string as per old behavior.") + @Nullable + public Boolean treatAsOldTimestamp; + + @Name(OracleConstants.TREAT_PRECISIONLESSNUM_AS_DECI) + @Description("A hidden field to handle precision less number as CDAP's decimal per old behavior.") + @Nullable + public Boolean treatPrecisionlessNumAsDeci; @Override protected int getDefaultPort() { @@ -109,6 +115,19 @@ public String getDatabase() { return database; } + public Boolean getSSlMode() { + // return false if useSSL is null, otherwise return its value + return useSSL != null && useSSL; + } + + public Boolean getTreatAsOldTimestamp() { + return Boolean.TRUE.equals(treatAsOldTimestamp); + } + + public Boolean getTreatPrecisionlessNumAsDeci() { + return Boolean.TRUE.equals(treatPrecisionlessNumAsDeci); + } + @Override public Properties getConnectionArgumentsProperties() { Properties prop = super.getConnectionArgumentsProperties(); @@ -119,6 +138,7 @@ public Properties getConnectionArgumentsProperties() { return prop; } + @Override public String getTransactionIsolationLevel() { //if null default to the highest isolation level possible if (transactionIsolationLevel == null) { @@ -128,16 +148,7 @@ public String getTransactionIsolationLevel() { //This ensures that the role is mapped to the right serialization level, even w/ incorrect user input //if role is SYSDBA or SYSOP it will map to read_committed. else serialized return (!getRole().equals(ROLE_NORMAL)) ? TransactionIsolationLevel.Level.TRANSACTION_READ_COMMITTED.name() : - TransactionIsolationLevel.Level.valueOf(transactionIsolationLevel).name(); - } - - @Override - public Map getAdditionalArguments() { - Map additonalArguments = new HashMap<>(); - if (getTransactionIsolationLevel() != null) { - additonalArguments.put(TransactionIsolationLevel.CONF_KEY, getTransactionIsolationLevel()); - } - return additonalArguments; + TransactionIsolationLevel.Level.valueOf(transactionIsolationLevel).name(); } @Override diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConstants.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConstants.java index 040780a89..cbd411175 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConstants.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleConstants.java @@ -16,6 +16,8 @@ package io.cdap.plugin.oracle; +import javax.annotation.Nullable; + /** * Oracle Constants. */ @@ -27,6 +29,10 @@ private OracleConstants() { public static final String PLUGIN_NAME = "Oracle"; public static final String ORACLE_CONNECTION_STRING_SID_FORMAT = "jdbc:oracle:thin:@%s:%s:%s"; public static final String ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT = "jdbc:oracle:thin:@//%s:%s/%s"; + // Connection formats to accept protocol (e.g., jdbc:oracle:thin:@://:/) + public static final String ORACLE_CONNECTION_STRING_SID_FORMAT_WITH_PROTOCOL = "jdbc:oracle:thin:@%s:%s:%s/%s"; + public static final String ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT_WITH_PROTOCOL = + "jdbc:oracle:thin:@%s://%s:%s/%s"; public static final String ORACLE_CONNECTION_STRING_TNS_FORMAT = "jdbc:oracle:thin:@%s"; public static final String DEFAULT_BATCH_VALUE = "defaultBatchValue"; public static final String DEFAULT_ROW_PREFETCH = "defaultRowPrefetch"; @@ -34,6 +40,96 @@ private OracleConstants() { public static final String CONNECTION_TYPE = "connectionType"; public static final String ROLE = "role"; public static final String NAME_DATABASE = "database"; - public static final String TNS_CONNECTION_TYPE = "TNS"; + public static final String TNS_CONNECTION_TYPE = "tns"; public static final String TRANSACTION_ISOLATION_LEVEL = "transactionIsolationLevel"; + public static final String USE_SSL = "useSSL"; + public static final String TREAT_AS_OLD_TIMESTAMP = "treatAsOldTimestamp"; + public static final String TREAT_PRECISIONLESSNUM_AS_DECI = "treatPrecisionlessNumAsDeci"; + + /** + * Constructs the Oracle connection string based on the provided connection type, host, port, and database. + * If SSL is enabled, the connection protocol will be "tcps" instead of "tcp". + * + * @param connectionType TNS/Service/SID + * @param host Host name of the oracle server + * @param port Port of the oracle server + * @param database Database to connect to + * @param useSSL Whether SSL/TLS is required(YES/NO) + * @return Connection String based on the given parameters and connection type. + */ + public static String getConnectionString(String connectionType, + @Nullable String host, + @Nullable int port, + String database, + @Nullable Boolean useSSL) { + // Use protocol as "tcps" when SSL is requested or else use "tcp". + String connectionProtocol; + boolean isSSLEnabled = false; + if (useSSL != null && useSSL) { + connectionProtocol = "tcps"; + isSSLEnabled = true; + } else { + connectionProtocol = "tcp"; + } + + switch (connectionType.toLowerCase()) { + case OracleConstants.TNS_CONNECTION_TYPE: + // TNS connection doesn't require protocol + return String.format(OracleConstants.ORACLE_CONNECTION_STRING_TNS_FORMAT, database); + case OracleConstants.SERVICE_CONNECTION_TYPE: + // Create connection string for SERVICE type. + return getConnectionStringWithService(host, port, database, connectionProtocol, isSSLEnabled); + default: + // Default to SID format if no matching case is found. + return getConnectionStringWithSID(host, port, database, connectionProtocol, isSSLEnabled); + } + } + + /** + * Constructs the connection string for a SERVICE connection type. + * + * @param host Host name of the Oracle server. + * @param port Port of the Oracle server. + * @param database Database name to connect to. + * @param connectionProtocol Protocol to use for the connection ("tcp" or "tcps"). + * @param isSSLEnabled Indicates if SSL is enabled. + * @return Formatted connection string for a SERVICE connection. + */ + private static String getConnectionStringWithService(@Nullable String host, + @Nullable int port, + String database, + String connectionProtocol, + boolean isSSLEnabled) { + // Choose the appropriate format based on whether SSL is enabled. + if (isSSLEnabled) { + return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT_WITH_PROTOCOL, + connectionProtocol, host, port, database); + } + return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT, + host, port, database); + } + + /** + * Constructs the connection string for a SID connection type. + * + * @param host Host name of the Oracle server. + * @param port Port of the Oracle server. + * @param database Database name to connect to. + * @param connectionProtocol Protocol to use for the connection ("tcp" or "tcps"). + * @param isSSLEnabled Indicates if SSL is enabled. + * @return Formatted connection string for a SID connection. + */ + private static String getConnectionStringWithSID(@Nullable String host, + @Nullable int port, + String database, + String connectionProtocol, + boolean isSSLEnabled) { + // Choose the appropriate format based on whether SSL is enabled. + if (isSSLEnabled) { + return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SID_FORMAT_WITH_PROTOCOL, + connectionProtocol, host, port, database); + } + return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SID_FORMAT, + host, port, database); + } } diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OraclePostAction.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OraclePostAction.java index f8ebd9ac2..e11e455c1 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OraclePostAction.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OraclePostAction.java @@ -57,13 +57,7 @@ public static class OracleQueryActionConfig extends DBSpecificQueryActionConfig @Override public String getConnectionString() { - if (OracleConstants.TNS_CONNECTION_TYPE.equals(this.connectionType)) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_TNS_FORMAT, database); - } else if (OracleConstants.SERVICE_CONNECTION_TYPE.equals(this.connectionType)) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT, host, port, database); - } else { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SID_FORMAT, host, port, database); - } + return OracleConstants.getConnectionString(this.connectionType, host, port, database, null); } @Override diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSinkDBRecord.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSinkDBRecord.java index 7bbd25f22..01b9a8247 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSinkDBRecord.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSinkDBRecord.java @@ -17,9 +17,12 @@ package io.cdap.plugin.oracle; import io.cdap.cdap.api.data.format.StructuredRecord; +import io.cdap.cdap.api.data.schema.Schema; import io.cdap.plugin.db.ColumnType; import io.cdap.plugin.db.SchemaReader; +import java.sql.PreparedStatement; +import java.sql.SQLException; import java.util.List; /** @@ -37,4 +40,14 @@ public OracleSinkDBRecord(StructuredRecord record, List columnTypes) protected SchemaReader getSchemaReader() { return new OracleSinkSchemaReader(); } + + @Override + protected void insertOperation(PreparedStatement stmt) throws SQLException { + for (int fieldIndex = 0; fieldIndex < columnTypes.size(); fieldIndex++) { + ColumnType columnType = columnTypes.get(fieldIndex); + // Get the field from the schema using the column name with ignoring case. + Schema.Field field = record.getSchema().getField(columnType.getName(), true); + writeToDB(stmt, field, fieldIndex); + } + } } diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java index 9a554a4a4..583fd686f 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSource.java @@ -63,7 +63,12 @@ protected String createConnectionString() { @Override protected SchemaReader getSchemaReader() { - return new OracleSourceSchemaReader(); + // PLUGIN-1893 : Based on field/properties from Oracle source and Oracle connection we will pass the flag to control + // handle schema to make it backward compatible. + boolean treatAsOldTimestamp = oracleSourceConfig.getConnection().getTreatAsOldTimestamp(); + boolean treatPrecisionlessNumAsDeci = oracleSourceConfig.getConnection().getTreatPrecisionlessNumAsDeci(); + + return new OracleSourceSchemaReader(null, treatAsOldTimestamp, treatPrecisionlessNumAsDeci); } @Override @@ -117,9 +122,11 @@ public OracleSourceConfig(String host, int port, String user, String password, S String connectionArguments, String connectionType, String database, String role, int defaultBatchValue, int defaultRowPrefetch, String importQuery, Integer numSplits, int fetchSize, - String boundingQuery, String splitBy) { + String boundingQuery, String splitBy, Boolean useSSL, Boolean treatAsOldTimestamp, + Boolean treatPrecisionlessNumAsDeci) { this.connection = new OracleConnectorConfig(host, port, user, password, jdbcPluginName, connectionArguments, - connectionType, database, role); + connectionType, database, role, useSSL, treatAsOldTimestamp, + treatPrecisionlessNumAsDeci); this.defaultBatchValue = defaultBatchValue; this.defaultRowPrefetch = defaultRowPrefetch; this.fetchSize = fetchSize; @@ -131,15 +138,8 @@ public OracleSourceConfig(String host, int port, String user, String password, S @Override public String getConnectionString() { - if (OracleConstants.TNS_CONNECTION_TYPE.equals(connection.getConnectionType())) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_TNS_FORMAT, connection.getDatabase()); - } else if (OracleConstants.SERVICE_CONNECTION_TYPE.equals(connection.getConnectionType())) { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SERVICE_NAME_FORMAT, connection.getHost(), - connection.getPort(), connection.getDatabase()); - } else { - return String.format(OracleConstants.ORACLE_CONNECTION_STRING_SID_FORMAT, - connection.getHost(), connection.getPort(), connection.getDatabase()); - } + return OracleConstants.getConnectionString(connection.getConnectionType(), connection.getHost(), + connection.getPort(), connection.getDatabase(), connection.getSSlMode()); } @Override diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java index 3f7c2a20a..7d7c69d2b 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceDBRecord.java @@ -160,8 +160,8 @@ protected void writeNonNullToDB(PreparedStatement stmt, Schema fieldSchema, String timestampString = Timestamp.valueOf(localDateTime).toString(); Object timestampWithTimeZone = createOracleTimestamp(stmt.getConnection(), timestampString); stmt.setObject(sqlIndex, timestampWithTimeZone); - } else if (Schema.LogicalType.TIMESTAMP_MICROS.equals(fieldSchema.getLogicalType())) { - // Deprecated: Handle the case when the Timestamp is mapped to CDAP Timestamp type + } else { + // Handle the case when the Timestamp is mapped to CDAP Timestamp type or CDAP Date type. super.writeNonNullToDB(stmt, fieldSchema, fieldName, fieldIndex); } } else { diff --git a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceSchemaReader.java b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceSchemaReader.java index 7d35f9bc7..dd17d2e84 100644 --- a/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceSchemaReader.java +++ b/oracle-plugin/src/main/java/io/cdap/plugin/oracle/OracleSourceSchemaReader.java @@ -26,6 +26,7 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Set; +import javax.annotation.Nullable; /** * Oracle Source schema reader. @@ -65,14 +66,17 @@ public class OracleSourceSchemaReader extends CommonSchemaReader { ); private final String sessionID; + private final Boolean isTimestampOldBehavior; + private final Boolean isPrecisionlessNumAsDecimal; public OracleSourceSchemaReader() { - this(null); + this(null, false, false); } - - public OracleSourceSchemaReader(String sessionID) { - super(); + public OracleSourceSchemaReader(@Nullable String sessionID, boolean isTimestampOldBehavior, + boolean isPrecisionlessNumAsDecimal) { this.sessionID = sessionID; + this.isTimestampOldBehavior = isTimestampOldBehavior; + this.isPrecisionlessNumAsDecimal = isPrecisionlessNumAsDecimal; } @Override @@ -81,10 +85,12 @@ public Schema getSchema(ResultSetMetaData metadata, int index) throws SQLExcepti switch (sqlType) { case TIMESTAMP_TZ: - return Schema.of(Schema.LogicalType.TIMESTAMP_MICROS); - case Types.TIMESTAMP: + return isTimestampOldBehavior ? Schema.of(Schema.Type.STRING) : Schema.of(Schema.LogicalType.TIMESTAMP_MICROS); case TIMESTAMP_LTZ: - return Schema.of(Schema.LogicalType.DATETIME); + return isTimestampOldBehavior ? Schema.of(Schema.LogicalType.TIMESTAMP_MICROS) + : Schema.of(Schema.LogicalType.DATETIME); + case Types.TIMESTAMP: + return isTimestampOldBehavior ? super.getSchema(metadata, index) : Schema.of(Schema.LogicalType.DATETIME); case BINARY_FLOAT: return Schema.of(Schema.Type.FLOAT); case BINARY_DOUBLE: @@ -107,12 +113,24 @@ public Schema getSchema(ResultSetMetaData metadata, int index) throws SQLExcepti // For a Number type without specified precision and scale, precision will be 0 and scale will be -127 if (precision == 0) { // reference : https://docs.oracle.com/cd/B28359_01/server.111/b28318/datatype.htm#CNCPT1832 - LOG.warn(String.format("Field '%s' is a %s type without precision and scale, " - + "converting into STRING type to avoid any precision loss.", - metadata.getColumnName(index), - metadata.getColumnTypeName(index), - metadata.getColumnName(index))); - return Schema.of(Schema.Type.STRING); + if (isPrecisionlessNumAsDecimal) { + precision = 38; + scale = 0; + LOG.warn(String.format("%s type with undefined precision and scale is detected, " + + "there may be a precision loss while running the pipeline. " + + "Please define an output precision and scale for field '%s' to avoid " + + "precision loss.", + metadata.getColumnTypeName(index), + metadata.getColumnName(index))); + return Schema.decimalOf(precision, scale); + } else { + LOG.warn(String.format("Field '%s' is a %s type without precision and scale, " + + "converting into STRING type to avoid any precision loss.", + metadata.getColumnName(index), + metadata.getColumnTypeName(index), + metadata.getColumnName(index))); + return Schema.of(Schema.Type.STRING); + } } return Schema.decimalOf(precision, scale); } diff --git a/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleFailedConnectionTest.java b/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleFailedConnectionTest.java index a2c9bcd5e..7ec6f3844 100644 --- a/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleFailedConnectionTest.java +++ b/oracle-plugin/src/test/java/io/cdap/plugin/oracle/OracleFailedConnectionTest.java @@ -28,7 +28,8 @@ public class OracleFailedConnectionTest extends DBSpecificFailedConnectionTest { public void test() throws ClassNotFoundException, IOException { OracleConnector connector = new OracleConnector( - new OracleConnectorConfig("localhost", 1521, "username", "password", "jdbc", "", "database")); + new OracleConnectorConfig("localhost", 1521, "username", "password", "jdbc", "", + "SID", "database")); super.test(JDBC_DRIVER_CLASS_NAME, connector, "Failed to create connection to database via connection string:" + " jdbc:oracle:thin:@localhost:1521:database and arguments: " + diff --git a/oracle-plugin/widgets/Oracle-batchsink.json b/oracle-plugin/widgets/Oracle-batchsink.json index 30d5b345f..8d6168780 100644 --- a/oracle-plugin/widgets/Oracle-batchsink.json +++ b/oracle-plugin/widgets/Oracle-batchsink.json @@ -100,6 +100,26 @@ "default": "TRANSACTION_SERIALIZABLE" } }, + { + "widget-type": "hidden", + "label": "TLS Encryption", + "name": "useSSL", + "description": "Enable TLS encryption (true/false)", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } + }, { "name": "connectionType", "label": "Connection Type", diff --git a/oracle-plugin/widgets/Oracle-batchsource.json b/oracle-plugin/widgets/Oracle-batchsource.json index 0fc0a5285..404262fb2 100644 --- a/oracle-plugin/widgets/Oracle-batchsource.json +++ b/oracle-plugin/widgets/Oracle-batchsource.json @@ -100,6 +100,64 @@ "default": "TRANSACTION_SERIALIZABLE" } }, + { + "widget-type": "hidden", + "label": "TLS Encryption", + "name": "useSSL", + "description": "Enable TLS encryption (true/false)", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } + }, + { + "widget-type": "hidden", + "label": "Treat as old timestamp", + "name": "treatAsOldTimestamp", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } + }, + { + "widget-type": "hidden", + "label": "Treat precision less number as Decimal(old behavior)", + "name": "treatPrecisionlessNumAsDeci", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } + }, { "name": "connectionType", "label": "Connection Type", @@ -306,6 +364,14 @@ { "type": "property", "name": "transactionIsolationLevel" + }, + { + "type": "property", + "name": "getTreatAsOldTimestampConn" + }, + { + "type": "property", + "name": "treatPrecisionlessNumAsDeci" } ] }, diff --git a/oracle-plugin/widgets/Oracle-connector.json b/oracle-plugin/widgets/Oracle-connector.json index 46f006c9c..013f3b240 100644 --- a/oracle-plugin/widgets/Oracle-connector.json +++ b/oracle-plugin/widgets/Oracle-connector.json @@ -109,6 +109,64 @@ ], "default": "TRANSACTION_SERIALIZABLE" } + }, + { + "widget-type": "hidden", + "label": "TLS Encryption", + "name": "useSSL", + "description": "Enable TLS encryption (true/false)", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } + }, + { + "widget-type": "hidden", + "label": "Treat as old timestamp", + "name": "treatAsOldTimestamp", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } + }, + { + "widget-type": "hidden", + "label": "Treat precision less number as Decimal(old behavior)", + "name": "treatPrecisionlessNumAsDeci", + "widget-attributes": { + "layout": "inline", + "default": "false", + "options": [ + { + "id": "true", + "label": "true" + }, + { + "id": "false", + "label": "false" + } + ] + } } ] }, diff --git a/pom.xml b/pom.xml index a6b40960c..4c24c7477 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ io.cdap.plugin database-plugins-parent - 1.11.0-SNAPSHOT + 1.11.8 pom Database Plugins Collection of database plugins @@ -44,6 +44,7 @@ cloudsql-postgresql-plugin teradata-plugin generic-db-argument-setter + amazon-redshift-plugin @@ -77,23 +78,12 @@ - - sonatype - https://oss.sonatype.org/content/groups/public - sonatype-snapshots - https://oss.sonatype.org/content/repositories/snapshots + https://central.sonatype.com/repository/maven-snapshots - - - sonatype - https://oss.sonatype.org/content/groups/public/ - - - @@ -348,16 +338,6 @@ - - - sonatype.release - https://oss.sonatype.org/service/local/staging/deploy/maven2 - - - sonatype.snapshots - https://oss.sonatype.org/content/repositories/snapshots - - ${testSourceLocation} @@ -399,7 +379,9 @@ maven-surefire-plugin 2.22.0 - -Xmx3g -Djava.awt.headless=true -XX:MaxPermSize=256m -XX:+UseConcMarkSweepGC -Djava.net.preferIPv4Stack=true + -Xmx3g -Djava.awt.headless=true -XX:MaxPermSize=256m -XX:+UseConcMarkSweepGC + -Djava.net.preferIPv4Stack=true + ${surefire.redirectTestOutputToFile} false plain @@ -531,14 +513,14 @@ - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.2 + org.sonatype.central + central-publishing-maven-plugin + 0.8.0 true - https://oss.sonatype.org - sonatype.release - 655dc88dc770c3 + sonatype.release + false + true @@ -723,7 +705,7 @@ io.cdap.tests.e2e cdap-e2e-framework - 0.3.0-SNAPSHOT + 0.3.0 test diff --git a/postgresql-plugin/docs/PostgreSQL-connector.md b/postgresql-plugin/docs/PostgreSQL-connector.md index 739c678e3..fe442cbf1 100644 --- a/postgresql-plugin/docs/PostgreSQL-connector.md +++ b/postgresql-plugin/docs/PostgreSQL-connector.md @@ -22,6 +22,14 @@ authentication. Optional for databases that do not require authentication. **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the databse connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- Note: PostgreSQL does not implement `TRANSACTION_READ_UNCOMMITTED` as a distinct isolation level. Instead, this mode behaves identically to`TRANSACTION_READ_COMMITTED`, which is why it is not exposed as a separate option. + +For more details on the Transaction Isolation Levels supported in PostgreSQL, refer to the [PostgreSQL documentation](https://www.postgresql.org/docs/current/transaction-iso.html#TRANSACTION-ISO) + **Database:** The name of the database to connect to. **Connection Arguments:** A list of arbitrary string tag/value pairs as connection arguments. These arguments diff --git a/postgresql-plugin/docs/Postgres-batchsink.md b/postgresql-plugin/docs/Postgres-batchsink.md index b8a996463..82065e0fd 100644 --- a/postgresql-plugin/docs/Postgres-batchsink.md +++ b/postgresql-plugin/docs/Postgres-batchsink.md @@ -39,6 +39,14 @@ You also can use the macro function ${conn(connection-name)}. **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the databse connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- Note: PostgreSQL does not implement `TRANSACTION_READ_UNCOMMITTED` as a distinct isolation level. Instead, this mode behaves identically to`TRANSACTION_READ_COMMITTED`, which is why it is not exposed as a separate option. + +For more details on the Transaction Isolation Levels supported in PostgreSQL, refer to the [PostgreSQL documentation](https://www.postgresql.org/docs/current/transaction-iso.html#TRANSACTION-ISO) + **Connection Arguments:** A list of arbitrary string key/value pairs as connection arguments. These arguments will be passed to the JDBC driver as connection arguments for JDBC drivers that may need additional configurations. diff --git a/postgresql-plugin/docs/Postgres-batchsource.md b/postgresql-plugin/docs/Postgres-batchsource.md index af359022d..559723526 100644 --- a/postgresql-plugin/docs/Postgres-batchsource.md +++ b/postgresql-plugin/docs/Postgres-batchsource.md @@ -49,6 +49,14 @@ For example, 'SELECT MIN(id),MAX(id) FROM table'. Not required if numSplits is s **Password:** Password to use to connect to the specified database. +**Transaction Isolation Level** The transaction isolation level of the databse connection +- TRANSACTION_READ_COMMITTED: No dirty reads. Non-repeatable reads and phantom reads are possible. +- TRANSACTION_SERIALIZABLE: No dirty reads. Non-repeatable and phantom reads are prevented. +- TRANSACTION_REPEATABLE_READ: No dirty reads. Prevents non-repeatable reads, but phantom reads are still possible. +- Note: PostgreSQL does not implement `TRANSACTION_READ_UNCOMMITTED` as a distinct isolation level. Instead, this mode behaves identically to`TRANSACTION_READ_COMMITTED`, which is why it is not exposed as a separate option. + +For more details on the Transaction Isolation Levels supported in PostgreSQL, refer to the [PostgreSQL documentation](https://www.postgresql.org/docs/current/transaction-iso.html#TRANSACTION-ISO) + **Connection Arguments:** A list of arbitrary string key/value pairs as connection arguments. These arguments will be passed to the JDBC driver as connection arguments for JDBC drivers that may need additional configurations. diff --git a/postgresql-plugin/pom.xml b/postgresql-plugin/pom.xml index 7f3e6f14c..9a5d33bfa 100644 --- a/postgresql-plugin/pom.xml +++ b/postgresql-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 PostgreSQL plugin @@ -100,9 +100,9 @@ <_exportcontents> - io.cdap.plugin.postgres.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; + io.cdap.plugin.postgres.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; *;inline=false;scope=compile true diff --git a/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSink.java b/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSink.java index 8fd91cc63..6525a3dfa 100644 --- a/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSink.java +++ b/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSink.java @@ -165,6 +165,11 @@ public Map getDBSpecificArguments() { return ImmutableMap.of(PostgresConstants.CONNECTION_TIMEOUT, String.valueOf(connectionTimeout)); } + @Override + public String getTransactionIsolationLevel() { + return connection.getTransactionIsolationLevel(); + } + @Override protected PostgresConnectorConfig getConnection() { return connection; diff --git a/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSource.java b/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSource.java index d6677884f..ccef4078e 100644 --- a/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSource.java +++ b/postgresql-plugin/src/main/java/io/cdap/plugin/postgres/PostgresSource.java @@ -133,6 +133,11 @@ protected PostgresConnectorConfig getConnection() { return connection; } + @Override + public String getTransactionIsolationLevel() { + return connection.getTransactionIsolationLevel(); + } + @Override public void validate(FailureCollector collector) { ConfigUtil.validateConnection(this, useConnection, connection, collector); diff --git a/postgresql-plugin/widgets/PostgreSQL-connector.json b/postgresql-plugin/widgets/PostgreSQL-connector.json index 091afc972..9a7a02e14 100644 --- a/postgresql-plugin/widgets/PostgreSQL-connector.json +++ b/postgresql-plugin/widgets/PostgreSQL-connector.json @@ -31,6 +31,19 @@ "default": "5432" } }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "textbox", "label": "Database", diff --git a/postgresql-plugin/widgets/Postgres-batchsink.json b/postgresql-plugin/widgets/Postgres-batchsink.json index 6aa2dad8a..14e6f8154 100644 --- a/postgresql-plugin/widgets/Postgres-batchsink.json +++ b/postgresql-plugin/widgets/Postgres-batchsink.json @@ -65,6 +65,19 @@ "label": "Password", "name": "password" }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "keyvalue", "label": "Connection Arguments", @@ -186,6 +199,10 @@ "type": "property", "name": "port" }, + { + "type": "property", + "name": "transactionIsolationLevel" + }, { "type": "property", "name": "database" diff --git a/postgresql-plugin/widgets/Postgres-batchsource.json b/postgresql-plugin/widgets/Postgres-batchsource.json index 0e4ba28c1..60de4725f 100644 --- a/postgresql-plugin/widgets/Postgres-batchsource.json +++ b/postgresql-plugin/widgets/Postgres-batchsource.json @@ -65,6 +65,19 @@ "label": "Password", "name": "password" }, + { + "widget-type": "select", + "label": "Transaction Isolation Level", + "name": "transactionIsolationLevel", + "widget-attributes": { + "values": [ + "TRANSACTION_READ_COMMITTED", + "TRANSACTION_REPEATABLE_READ", + "TRANSACTION_SERIALIZABLE" + ], + "default": "TRANSACTION_SERIALIZABLE" + } + }, { "widget-type": "keyvalue", "label": "Connection Arguments", @@ -206,6 +219,10 @@ "type": "property", "name": "port" }, + { + "type": "property", + "name": "transactionIsolationLevel" + }, { "type": "property", "name": "database" diff --git a/saphana-plugin/pom.xml b/saphana-plugin/pom.xml index 86b40a38e..4b15f4e89 100644 --- a/saphana-plugin/pom.xml +++ b/saphana-plugin/pom.xml @@ -20,7 +20,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 SAP HANA plugin @@ -85,13 +85,13 @@ <_exportcontents> - io.cdap.plugin.saphana.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - io.cdap.plugin.saphana.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.saphana.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + io.cdap.plugin.saphana.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true diff --git a/teradata-plugin/pom.xml b/teradata-plugin/pom.xml index fa770a19a..94c47c674 100644 --- a/teradata-plugin/pom.xml +++ b/teradata-plugin/pom.xml @@ -21,7 +21,7 @@ database-plugins-parent io.cdap.plugin - 1.11.0-SNAPSHOT + 1.11.8 teradata-plugin @@ -90,14 +90,14 @@ <_exportcontents> - io.cdap.plugin.teradata.*; - io.cdap.plugin.util.*; - io.cdap.plugin.db.source.*; - io.cdap.plugin.db.sink.*; - io.cdap.plugin.saphana.*; - org.apache.commons.lang; - org.apache.commons.logging.*; - org.codehaus.jackson.* + io.cdap.plugin.teradata.*; + io.cdap.plugin.util.*; + io.cdap.plugin.db.source.*; + io.cdap.plugin.db.sink.*; + io.cdap.plugin.saphana.*; + org.apache.commons.lang; + org.apache.commons.logging.*; + org.codehaus.jackson.* *;inline=false;scope=compile true