From d24ad7c062eb1b5d9dce31a07c1e5329fde0b152 Mon Sep 17 00:00:00 2001 From: Akshay Karle <1443108+akshaykarle@users.noreply.github.com> Date: Wed, 2 Nov 2022 12:16:38 +0000 Subject: [PATCH 1/5] fix save_test_history macro to work when a model depends_on a source with the same name * Without checking the node_type, a model could match a source with the same name and the macro would error out trying to get the database on that node which is not present for a source * Fixes issue: https://github.com/re-data/re-data/issues/371 --- macros/run_end/save_results_history.sql | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/macros/run_end/save_results_history.sql b/macros/run_end/save_results_history.sql index 0b44bbc..0aa6a55 100644 --- a/macros/run_end/save_results_history.sql +++ b/macros/run_end/save_results_history.sql @@ -40,15 +40,15 @@ {% if any_refs %} {% set name = any_refs[0] %} - {% set node_name = re_data.priv_full_name_from_depends(el.node, name) %} + {% set node_name = re_data.priv_full_name_from_depends(el.node, name, "model") %} {% set schema = graph.nodes.get(node_name)['schema'] %} {% set database = graph.nodes.get(node_name)['database'] %} - {% set table_name = (database + '.' + schema + '.' + name) | lower %} - + {% set table_name = (database + '.' + schema + '.' + name) | lower %} + {% elif any_source %} {% set package_name = any_source[0][0] %} {% set name = any_source[0][1] %} - {% set node_name = re_data.priv_full_name_from_depends(el.node, name) %} + {% set node_name = re_data.priv_full_name_from_depends(el.node, name, "source") %} {% set schema = graph.sources.get(node_name)['schema'] %} {% set database = graph.sources.get(node_name)['database'] %} {% set table_name = (database + '.' + schema + '.' + name) | lower %} @@ -98,11 +98,12 @@ {% endmacro %} -{% macro priv_full_name_from_depends(node, name) %} +{% macro priv_full_name_from_depends(node, name, type) %} {% for full_name in node.depends_on.nodes %} {% set node_name = full_name.split('.')[-1] %} - {% if node_name == name %} + {% set node_type = full_name.split('.')[0] %} + {% if node_name == name and node_type == type %} {{ return(full_name) }} {% endif %} {% endfor %} From cb7085ee8e5986ab0488ad3c8aa6ad1a89d2e3c9 Mon Sep 17 00:00:00 2001 From: Carmen Mardiros Date: Tue, 20 Jun 2023 17:21:58 +0100 Subject: [PATCH 2/5] Make ref_type passed to priv_full_name_from_depends dynamic to account for model/snapshot types --- macros/run_end/save_results_history.sql | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/macros/run_end/save_results_history.sql b/macros/run_end/save_results_history.sql index 0aa6a55..1e4f53f 100644 --- a/macros/run_end/save_results_history.sql +++ b/macros/run_end/save_results_history.sql @@ -40,7 +40,12 @@ {% if any_refs %} {% set name = any_refs[0] %} - {% set node_name = re_data.priv_full_name_from_depends(el.node, name, "model") %} + {% if "snapshot" in name %} + {% set ref_type = "snapshot" %} + {% else %} + {% set ref_type = "model" %} + {% endif %} + {% set node_name = re_data.priv_full_name_from_depends(el.node, name, ref_type) %} {% set schema = graph.nodes.get(node_name)['schema'] %} {% set database = graph.nodes.get(node_name)['database'] %} {% set table_name = (database + '.' + schema + '.' + name) | lower %} From 96cb380ded7554e5d5df21d8d64741aa62ceb1fc Mon Sep 17 00:00:00 2001 From: Carmen Mardiros Date: Tue, 20 Jun 2023 17:26:51 +0100 Subject: [PATCH 3/5] Add comment --- macros/run_end/save_results_history.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/macros/run_end/save_results_history.sql b/macros/run_end/save_results_history.sql index 1e4f53f..9da86f6 100644 --- a/macros/run_end/save_results_history.sql +++ b/macros/run_end/save_results_history.sql @@ -40,6 +40,9 @@ {% if any_refs %} {% set name = any_refs[0] %} + -- NOTE: This relies on snapshot models to include the word "snapshot" in the model name. + -- There doesn't seem to be an easily accessible field to tell us that this node is a snapshot + -- without relying on the name or other proxies. {% if "snapshot" in name %} {% set ref_type = "snapshot" %} {% else %} From 930ee9aa0a44bf7b157b9afe90318b0b1b826e1b Mon Sep 17 00:00:00 2001 From: Carmen Mardiros Date: Thu, 22 Jun 2023 12:18:19 +0100 Subject: [PATCH 4/5] Extend to snapshots without 'snapshot' in name --- macros/run_end/save_results_history.sql | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/macros/run_end/save_results_history.sql b/macros/run_end/save_results_history.sql index 9da86f6..ac2b378 100644 --- a/macros/run_end/save_results_history.sql +++ b/macros/run_end/save_results_history.sql @@ -40,10 +40,12 @@ {% if any_refs %} {% set name = any_refs[0] %} - -- NOTE: This relies on snapshot models to include the word "snapshot" in the model name. - -- There doesn't seem to be an easily accessible field to tell us that this node is a snapshot - -- without relying on the name or other proxies. - {% if "snapshot" in name %} + -- NOTE: This relies on snapshot models to include the word "snapshot" in the model name or + -- the column `dbt_scd_id` to be included in the metadata kwargs (since this is a good proxy + -- for this model being a snapshot given `dbt_scd_id` is an internal dbt field). + -- There doesn't seem to be another, easily accessible field to tell us unambigously that this node is a snapshot + {% set metadata_column_name = el.node.to_dict().get('test_metadata', dict()).get('kwargs', dict()).get('column_name') %} + {% if "snapshot" in name or metadata_column_name == "dbt_scd_id" %} {% set ref_type = "snapshot" %} {% else %} {% set ref_type = "model" %} From 883a90deb5609553b338612f31e53936cd395c92 Mon Sep 17 00:00:00 2001 From: Carmen Mardiros Date: Thu, 22 Jun 2023 15:00:29 +0100 Subject: [PATCH 5/5] fix save_test_history macro to work when a model (snapshot or model) depends_on a source with the same name --- macros/run_end/save_results_history.sql | 33 +++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/macros/run_end/save_results_history.sql b/macros/run_end/save_results_history.sql index ac2b378..8bfda70 100644 --- a/macros/run_end/save_results_history.sql +++ b/macros/run_end/save_results_history.sql @@ -38,19 +38,21 @@ {% set any_refs = modules.re.findall("ref\(\'(?P.*)\'\)", el.node.test_metadata.kwargs['model']) %} {% set any_source = modules.re.findall("source\(\'(?P.*)\'\,\s+\'(?P.*)\'\)", el.node.test_metadata.kwargs['model']) %} + -- Separate `depends_on` nodes into sources and models to avoid incorrect matching of a test to a source + -- when it fact it refers to a model (snapshot or regular model) + {% set depends_on_sources = [] %} + {% set depends_on_models = [] %} + {% for full_name in el.node.depends_on.nodes %} + {%- if full_name.split('.')[0] == "source" -%} + {% do depends_on_sources.append(full_name)%} + {%- else -%} + {% do depends_on_models.append(full_name)%} + {%- endif -%} + {% endfor %} + {% if any_refs %} {% set name = any_refs[0] %} - -- NOTE: This relies on snapshot models to include the word "snapshot" in the model name or - -- the column `dbt_scd_id` to be included in the metadata kwargs (since this is a good proxy - -- for this model being a snapshot given `dbt_scd_id` is an internal dbt field). - -- There doesn't seem to be another, easily accessible field to tell us unambigously that this node is a snapshot - {% set metadata_column_name = el.node.to_dict().get('test_metadata', dict()).get('kwargs', dict()).get('column_name') %} - {% if "snapshot" in name or metadata_column_name == "dbt_scd_id" %} - {% set ref_type = "snapshot" %} - {% else %} - {% set ref_type = "model" %} - {% endif %} - {% set node_name = re_data.priv_full_name_from_depends(el.node, name, ref_type) %} + {% set node_name = re_data.priv_full_name_from_depends(el.node, name, depends_on_models) %} {% set schema = graph.nodes.get(node_name)['schema'] %} {% set database = graph.nodes.get(node_name)['database'] %} {% set table_name = (database + '.' + schema + '.' + name) | lower %} @@ -58,7 +60,7 @@ {% elif any_source %} {% set package_name = any_source[0][0] %} {% set name = any_source[0][1] %} - {% set node_name = re_data.priv_full_name_from_depends(el.node, name, "source") %} + {% set node_name = re_data.priv_full_name_from_depends(el.node, name, depends_on_sources) %} {% set schema = graph.sources.get(node_name)['schema'] %} {% set database = graph.sources.get(node_name)['database'] %} {% set table_name = (database + '.' + schema + '.' + name) | lower %} @@ -108,12 +110,11 @@ {% endmacro %} -{% macro priv_full_name_from_depends(node, name, type) %} +{% macro priv_full_name_from_depends(node, name, depends_on_nodes) %} - {% for full_name in node.depends_on.nodes %} + {% for full_name in depends_on_nodes %} {% set node_name = full_name.split('.')[-1] %} - {% set node_type = full_name.split('.')[0] %} - {% if node_name == name and node_type == type %} + {% if node_name == name %} {{ return(full_name) }} {% endif %} {% endfor %}