From 6c11ae83399ec4131c6da41f41c3a68cf4ab279b Mon Sep 17 00:00:00 2001 From: suelai Date: Mon, 18 Dec 2023 11:44:23 +0100 Subject: [PATCH 1/9] column specific anomaly tuning --- macros/meta/get_column_specific_config.sql | 20 +++++++++++++ models/meta/re_data_selected.sql | 18 ++++++++---- models/meta/re_data_selected_columns.sql | 33 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 macros/meta/get_column_specific_config.sql create mode 100644 models/meta/re_data_selected_columns.sql diff --git a/macros/meta/get_column_specific_config.sql b/macros/meta/get_column_specific_config.sql new file mode 100644 index 0000000..0611d70 --- /dev/null +++ b/macros/meta/get_column_specific_config.sql @@ -0,0 +1,20 @@ +{% macro get_column_specific_anomaly( + database, schema, column_name, table_name, metric_name +) %} + {% set column_path = "column." ~ column_name %} + select + name as table_name, + schema as schema, + database as database, + {{ "'" ~ column_name ~ "'" }} as column_name, + {{ "'" ~ metric_name ~ "'" }} as metric_name, + json_query(t, {{ "'$." ~ metric_name ~ "'" }}) as metric_spec + from + {{ ref("re_data_selected") }}, + unnest({{ re_data.json_extract_array("additional_metrics", column_path) }}) t + where + name = {{ "'" ~ table_name ~ "'" }} + and schema = {{ "'" ~ schema ~ "'" }} + and database = {{ "'" ~ database ~ "'" }} + +{% endmacro %} diff --git a/models/meta/re_data_selected.sql b/models/meta/re_data_selected.sql index 34348af..c7c07df 100644 --- a/models/meta/re_data_selected.sql +++ b/models/meta/re_data_selected.sql @@ -1,6 +1,12 @@ - -select - name, schema, database, time_filter, metrics, columns, anomaly_detector, owners -from {{ ref('re_data_monitored')}} -where - selected = true \ No newline at end of file +select + name, + schema, + database, + time_filter, + metrics, + additional_metrics, + columns, + anomaly_detector, + owners +from {{ ref("re_data_monitored") }} +where selected = true diff --git a/models/meta/re_data_selected_columns.sql b/models/meta/re_data_selected_columns.sql new file mode 100644 index 0000000..f0585b4 --- /dev/null +++ b/models/meta/re_data_selected_columns.sql @@ -0,0 +1,33 @@ +-- depends_on: {{ ref('re_data_z_score') }} +-- depends_on: {{ ref('re_data_selected') }} +{% set column_metric %} + select distinct + {{ split_and_return_nth_value("table_name", ".", 1) }} as database, + {{ split_and_return_nth_value("table_name", ".", 2) }} as schema, + {{ split_and_return_nth_value("table_name", ".", 3) }} as name, + column_name, + metric + from {{ ref('re_data_z_score')}} + where column_name != '' +{% endset %} +{% set column_metric_details = run_query(column_metric) %} +{% for col in column_metric_details %} + {% set column_name = re_data.row_value(col, "column_name") %} + {% set metric = re_data.row_value(col, "metric") %} + {% set database = re_data.row_value(col, "database") %} + {% set schema = re_data.row_value(col, "schema") %} + {% set table_name = re_data.row_value(col, "name") %} + {{ + get_column_specific_anomaly( + database=database, + schema=schema, + column_name=column_name, + table_name=table_name, + metric_name=metric, + ) + }} + {%- if not loop.last %} + union all + {%- endif %} + +{% endfor %} From 4601403544de1e9b171c5c31386475ab0fd459bc Mon Sep 17 00:00:00 2001 From: suelai Date: Wed, 27 Dec 2023 11:29:55 +0100 Subject: [PATCH 2/9] set up new models for column specifics --- macros/meta/get_column_specific_config.sql | 4 +- macros/utils/anomaly_labeling.sql | 79 ++++++++++++++++ macros/utils/fivetran_utils/json_extract.sql | 40 +++++++-- models/alerts/re_data_anomalies.sql | 95 +++++++++++++------- 4 files changed, 175 insertions(+), 43 deletions(-) create mode 100644 macros/utils/anomaly_labeling.sql diff --git a/macros/meta/get_column_specific_config.sql b/macros/meta/get_column_specific_config.sql index 0611d70..222fc46 100644 --- a/macros/meta/get_column_specific_config.sql +++ b/macros/meta/get_column_specific_config.sql @@ -3,10 +3,10 @@ ) %} {% set column_path = "column." ~ column_name %} select - name as table_name, + name as name, schema as schema, database as database, - {{ "'" ~ column_name ~ "'" }} as column_name, + {{ "'" ~ column_name ~ "'" }} as column, {{ "'" ~ metric_name ~ "'" }} as metric_name, json_query(t, {{ "'$." ~ metric_name ~ "'" }}) as metric_spec from diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql new file mode 100644 index 0000000..22e04ea --- /dev/null +++ b/macros/utils/anomaly_labeling.sql @@ -0,0 +1,79 @@ +{% macro is_anomaly_from_model( + anomaly_config, + last_value, + last_avg, + z_score_value, + modified_z_score_value, + last_first_quartile, + last_iqr, + last_third_quartile +) %} + case + when + ( + lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) + = 'up' + and {{ last_value }} > {{ last_avg }} + ) + or ( + lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) + = 'down' + and {{ last_value }} < {{ last_avg }} + ) + or ( + lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) + != 'up' + and lower( + coalesce({{ json_extract(anomaly_config, "direction") }}, 'both') + ) + != 'down' + ) + then + case + when {{ json_extract(anomaly_config, "name") }} = 'z_score' + then + abs({{ z_score_value }}) > cast( + {{ json_extract(anomaly_config, "threshold") }} + as {{ numeric_type() }} + ) + when {{ json_extract(anomaly_config, "name") }} = 'modified_z_score' + then + abs({{ modified_z_score_value }}) > cast( + {{ json_extract(anomaly_config, "threshold") }} + as {{ numeric_type() }} + ) + when {{ json_extract(anomaly_config, "name") }} = 'boxplot' + then + ( + {{ last_value }} + < {{ last_first_quartile }} + - ( + cast( + {{ + json_extract( + anomaly_config, + "whisker_boundary_multiplier", + ) + }} as {{ numeric_type() }} + ) + * {{ last_iqr }} + ) + or {{ last_value }} + > {{ last_third_quartile }} + + ( + cast( + {{ + json_extract( + anomaly_config, + "whisker_boundary_multiplier", + ) + }} as {{ numeric_type() }} + ) + * {{ last_iqr }} + ) + ) + else false + end + else false + end +{% endmacro %} diff --git a/macros/utils/fivetran_utils/json_extract.sql b/macros/utils/fivetran_utils/json_extract.sql index 835f84c..39b80f3 100644 --- a/macros/utils/fivetran_utils/json_extract.sql +++ b/macros/utils/fivetran_utils/json_extract.sql @@ -2,39 +2,61 @@ # This file contains significant part of code derived from # https://github.com/fivetran/dbt_fivetran_utils/tree/v0.4.0 which is licensed under Apache License 2.0. #} - {% macro json_extract(string, string_path) -%} -{{ adapter.dispatch('json_extract','re_data') (string, string_path) }} + {{ adapter.dispatch("json_extract", "re_data")(string, string_path) }} {%- endmacro %} {% macro default__json_extract(string, string_path) %} - json_extract_path_text({{string}}, {{ "'" ~ string_path ~ "'" }} ) - + json_extract_path_text({{ string }}, {{ "'" ~ string_path ~ "'" }}) + {% endmacro %} {% macro snowflake__json_extract(string, string_path) %} - json_extract_path_text(try_parse_json( {{string}} ), {{ "'" ~ string_path ~ "'" }} ) + json_extract_path_text(try_parse_json({{ string }}), {{ "'" ~ string_path ~ "'" }}) {% endmacro %} {% macro redshift__json_extract(string, string_path) %} - case when is_valid_json( {{string}} ) then json_extract_path_text({{string}}, {{ "'" ~ string_path ~ "'" }} ) else null end - + case + when is_valid_json({{ string }}) + then json_extract_path_text({{ string }}, {{ "'" ~ string_path ~ "'" }}) + else null + end + {% endmacro %} {% macro bigquery__json_extract(string, string_path) %} - json_extract_scalar({{string}}, {{ "'$." ~ string_path ~ "'" }} ) + json_extract_scalar({{ string }}, {{ "'$." ~ string_path ~ "'" }}) {% endmacro %} {% macro postgres__json_extract(string, string_path) %} - {{string}}::json->>{{"'" ~ string_path ~ "'" }} + {{ string }}::json ->>{{ "'" ~ string_path ~ "'" }} + +{% endmacro %} + + +{% macro json_extract_array(string, string_path) -%} + + {{ adapter.dispatch("json_extract_array", "re_data")(string, string_path) }} + +{%- endmacro %} + +{% macro default__json_extract_array(string, string_path) %} + + json_extract_array({{ string }}, {{ "'" ~ string_path ~ "'" }}) +{%- endmacro %} + + +{% macro bigquery__json_extract_array(string, string_path) %} + + json_extract_array({{ string }}, {{ "'$." ~ string_path ~ "'" }}) {% endmacro %} diff --git a/models/alerts/re_data_anomalies.sql b/models/alerts/re_data_anomalies.sql index 58926af..dfc8dc5 100644 --- a/models/alerts/re_data_anomalies.sql +++ b/models/alerts/re_data_anomalies.sql @@ -1,8 +1,4 @@ -{{ - config( - materialized='view' - ) -}} +{{ config(materialized="view") }} select z.id, z.table_name, @@ -11,6 +7,7 @@ select z.z_score_value, z.modified_z_score_value, m.anomaly_detector, + c.metric_spec, z.last_value, z.last_avg, z.last_median, @@ -18,38 +15,72 @@ select z.last_median_absolute_deviation, z.last_mean_absolute_deviation, z.last_iqr, - z.last_first_quartile - (cast( {{ json_extract('m.anomaly_detector', 'whisker_boundary_multiplier') }} as {{numeric_type()}} ) * z.last_iqr) lower_bound, - z.last_third_quartile + (cast( {{ json_extract('m.anomaly_detector', 'whisker_boundary_multiplier') }} as {{numeric_type()}} ) * z.last_iqr) upper_bound, + z.last_first_quartile - ( + cast( + {{ json_extract("m.anomaly_detector", "whisker_boundary_multiplier") }} + as {{ numeric_type() }} + ) + * z.last_iqr + ) lower_bound, + z.last_third_quartile + ( + cast( + {{ json_extract("m.anomaly_detector", "whisker_boundary_multiplier") }} + as {{ numeric_type() }} + ) + * z.last_iqr + ) upper_bound, z.last_first_quartile, z.last_third_quartile, z.time_window_end, z.interval_length_sec, z.computed_on, - {{ re_data.generate_anomaly_message('z.column_name', 'z.metric', 'z.last_value', 'z.last_avg') }} as message, - {{ re_data.generate_metric_value_text('z.metric', 'z.last_value') }} as last_value_text -from - {{ ref('re_data_z_score')}} z -left join {{ ref('re_data_selected') }} m -on {{ split_and_return_nth_value('table_name', '.', 1) }} = m.database -and {{ split_and_return_nth_value('table_name', '.', 2) }} = m.schema -and {{ split_and_return_nth_value('table_name', '.', 3) }} = m.name + {{ + re_data.generate_anomaly_message( + "z.column_name", "z.metric", "z.last_value", "z.last_avg" + ) + }} as message, + {{ re_data.generate_metric_value_text("z.metric", "z.last_value") }} + as last_value_text +from {{ ref("re_data_z_score") }} z +left join + {{ ref("re_data_selected") }} m + on {{ split_and_return_nth_value("table_name", ".", 1) }} = m.database + and {{ split_and_return_nth_value("table_name", ".", 2) }} = m.schema + and {{ split_and_return_nth_value("table_name", ".", 3) }} = m.name +left join + {{ ref("re_data_selected_columns") }} c + on {{ split_and_return_nth_value("table_name", ".", 1) }} = c.database + and {{ split_and_return_nth_value("table_name", ".", 2) }} = c.schema + and {{ split_and_return_nth_value("table_name", ".", 3) }} = c.name + and z.column_name = c.column where - case when (lower(coalesce({{ json_extract('m.anomaly_detector', 'direction') }}, 'both')) = 'up' and z.last_value > z.last_avg) - or (lower(coalesce({{ json_extract('m.anomaly_detector', 'direction') }}, 'both')) = 'down' and z.last_value < z.last_avg) - or (lower(coalesce({{ json_extract('m.anomaly_detector', 'direction') }}, 'both')) != 'up' and lower(coalesce({{ json_extract('m.anomaly_detector', 'direction') }}, 'both')) != 'down') + case + when c.metric_spec is not null then - case - when {{ json_extract('m.anomaly_detector', 'name') }} = 'z_score' - then abs(z_score_value) > cast({{ json_extract('m.anomaly_detector', 'threshold') }} as {{ numeric_type() }}) - when {{ json_extract('m.anomaly_detector', 'name') }} = 'modified_z_score' - then abs(modified_z_score_value) > cast( {{ json_extract('m.anomaly_detector', 'threshold') }} as {{numeric_type()}} ) - when {{ json_extract('m.anomaly_detector', 'name') }} = 'boxplot' - then ( - z.last_value < z.last_first_quartile - (cast( {{ json_extract('m.anomaly_detector', 'whisker_boundary_multiplier') }} as {{numeric_type()}} ) * z.last_iqr) - or - z.last_value > z.last_third_quartile + (cast( {{ json_extract('m.anomaly_detector', 'whisker_boundary_multiplier') }} as {{numeric_type()}} ) * z.last_iqr) - ) - else false - end - else false + {{ + is_anomaly_from_model( + anomaly_config="'c.metric_spec'", + last_value="z.last_value", + last_avg="z.last_avg", + z_score_value="z.z_score_value", + modified_z_score_value="z.modified_z_score_value", + last_first_quartile="z.last_first_quartile", + last_iqr="z.last_iqr", + last_third_quartile="z.last_third_quartile", + ) + }} + else + {{ + is_anomaly_from_model( + anomaly_config="'m.anomaly_detector'", + last_value="z.last_value", + last_avg="z.last_avg", + z_score_value="z.z_score_value", + modified_z_score_value="z.modified_z_score_value", + last_first_quartile="z.last_first_quartile", + last_iqr="z.last_iqr", + last_third_quartile="z.last_third_quartile", + ) + }} + end From e0252c173d0839822b1e8a73ad22f7baaa9775e8 Mon Sep 17 00:00:00 2001 From: suelai Date: Wed, 27 Dec 2023 13:16:32 +0100 Subject: [PATCH 3/9] typo --- macros/utils/anomaly_labeling.sql | 159 ++++++++++++++++-------------- 1 file changed, 83 insertions(+), 76 deletions(-) diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql index 22e04ea..5899e20 100644 --- a/macros/utils/anomaly_labeling.sql +++ b/macros/utils/anomaly_labeling.sql @@ -1,79 +1,86 @@ -{% macro is_anomaly_from_model( - anomaly_config, - last_value, - last_avg, - z_score_value, - modified_z_score_value, - last_first_quartile, - last_iqr, - last_third_quartile -) %} +{{ config(materialized="view") }} +select + z.id, + z.table_name, + z.column_name, + z.metric, + z.z_score_value, + z.modified_z_score_value, + m.anomaly_detector, + c.metric_spec, + z.last_value, + z.last_avg, + z.last_median, + z.last_stddev, + z.last_median_absolute_deviation, + z.last_mean_absolute_deviation, + z.last_iqr, + z.last_first_quartile - ( + cast( + {{ json_extract("m.anomaly_detector", "whisker_boundary_multiplier") }} + as {{ numeric_type() }} + ) + * z.last_iqr + ) lower_bound, + z.last_third_quartile + ( + cast( + {{ json_extract("m.anomaly_detector", "whisker_boundary_multiplier") }} + as {{ numeric_type() }} + ) + * z.last_iqr + ) upper_bound, + z.last_first_quartile, + z.last_third_quartile, + z.time_window_end, + z.interval_length_sec, + z.computed_on, + {{ + re_data.generate_anomaly_message( + "z.column_name", "z.metric", "z.last_value", "z.last_avg" + ) + }} as message, + {{ re_data.generate_metric_value_text("z.metric", "z.last_value") }} + as last_value_text +from {{ ref("re_data_z_score") }} z +left join + {{ ref("re_data_selected") }} m + on {{ split_and_return_nth_value("table_name", ".", 1) }} = m.database + and {{ split_and_return_nth_value("table_name", ".", 2) }} = m.schema + and {{ split_and_return_nth_value("table_name", ".", 3) }} = m.name +left join + {{ ref("re_data_selected_columns") }} c + on {{ split_and_return_nth_value("table_name", ".", 1) }} = c.database + and {{ split_and_return_nth_value("table_name", ".", 2) }} = c.schema + and {{ split_and_return_nth_value("table_name", ".", 3) }} = c.name + and z.column_name = c.column +where case - when - ( - lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) - = 'up' - and {{ last_value }} > {{ last_avg }} - ) - or ( - lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) - = 'down' - and {{ last_value }} < {{ last_avg }} - ) - or ( - lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) - != 'up' - and lower( - coalesce({{ json_extract(anomaly_config, "direction") }}, 'both') - ) - != 'down' - ) + when c.metric_spec is not null then - case - when {{ json_extract(anomaly_config, "name") }} = 'z_score' - then - abs({{ z_score_value }}) > cast( - {{ json_extract(anomaly_config, "threshold") }} - as {{ numeric_type() }} - ) - when {{ json_extract(anomaly_config, "name") }} = 'modified_z_score' - then - abs({{ modified_z_score_value }}) > cast( - {{ json_extract(anomaly_config, "threshold") }} - as {{ numeric_type() }} - ) - when {{ json_extract(anomaly_config, "name") }} = 'boxplot' - then - ( - {{ last_value }} - < {{ last_first_quartile }} - - ( - cast( - {{ - json_extract( - anomaly_config, - "whisker_boundary_multiplier", - ) - }} as {{ numeric_type() }} - ) - * {{ last_iqr }} - ) - or {{ last_value }} - > {{ last_third_quartile }} - + ( - cast( - {{ - json_extract( - anomaly_config, - "whisker_boundary_multiplier", - ) - }} as {{ numeric_type() }} - ) - * {{ last_iqr }} - ) - ) - else false - end - else false + {{ + is_anomaly_from_model( + anomaly_config="c.metric_spec", + last_value="z.last_value", + last_avg="z.last_avg", + z_score_value="z.z_score_value", + modified_z_score_value="z.modified_z_score_value", + last_first_quartile="z.last_first_quartile", + last_iqr="z.last_iqr", + last_third_quartile="z.last_third_quartile", + ) + }} + else + {{ + is_anomaly_from_model( + anomaly_config="m.anomaly_detector", + last_value="z.last_value", + last_avg="z.last_avg", + z_score_value="z.z_score_value", + modified_z_score_value="z.modified_z_score_value", + last_first_quartile="z.last_first_quartile", + last_iqr="z.last_iqr", + last_third_quartile="z.last_third_quartile", + ) + }} + end -{% endmacro %} From 0063f256bf587a8370936a925b796a72743c10b0 Mon Sep 17 00:00:00 2001 From: suelai Date: Wed, 27 Dec 2023 13:30:13 +0100 Subject: [PATCH 4/9] fix --- macros/utils/anomaly_labeling.sql | 159 ++++++++++++++---------------- 1 file changed, 76 insertions(+), 83 deletions(-) diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql index 5899e20..22e04ea 100644 --- a/macros/utils/anomaly_labeling.sql +++ b/macros/utils/anomaly_labeling.sql @@ -1,86 +1,79 @@ -{{ config(materialized="view") }} -select - z.id, - z.table_name, - z.column_name, - z.metric, - z.z_score_value, - z.modified_z_score_value, - m.anomaly_detector, - c.metric_spec, - z.last_value, - z.last_avg, - z.last_median, - z.last_stddev, - z.last_median_absolute_deviation, - z.last_mean_absolute_deviation, - z.last_iqr, - z.last_first_quartile - ( - cast( - {{ json_extract("m.anomaly_detector", "whisker_boundary_multiplier") }} - as {{ numeric_type() }} - ) - * z.last_iqr - ) lower_bound, - z.last_third_quartile + ( - cast( - {{ json_extract("m.anomaly_detector", "whisker_boundary_multiplier") }} - as {{ numeric_type() }} - ) - * z.last_iqr - ) upper_bound, - z.last_first_quartile, - z.last_third_quartile, - z.time_window_end, - z.interval_length_sec, - z.computed_on, - {{ - re_data.generate_anomaly_message( - "z.column_name", "z.metric", "z.last_value", "z.last_avg" - ) - }} as message, - {{ re_data.generate_metric_value_text("z.metric", "z.last_value") }} - as last_value_text -from {{ ref("re_data_z_score") }} z -left join - {{ ref("re_data_selected") }} m - on {{ split_and_return_nth_value("table_name", ".", 1) }} = m.database - and {{ split_and_return_nth_value("table_name", ".", 2) }} = m.schema - and {{ split_and_return_nth_value("table_name", ".", 3) }} = m.name -left join - {{ ref("re_data_selected_columns") }} c - on {{ split_and_return_nth_value("table_name", ".", 1) }} = c.database - and {{ split_and_return_nth_value("table_name", ".", 2) }} = c.schema - and {{ split_and_return_nth_value("table_name", ".", 3) }} = c.name - and z.column_name = c.column -where +{% macro is_anomaly_from_model( + anomaly_config, + last_value, + last_avg, + z_score_value, + modified_z_score_value, + last_first_quartile, + last_iqr, + last_third_quartile +) %} case - when c.metric_spec is not null - then - {{ - is_anomaly_from_model( - anomaly_config="c.metric_spec", - last_value="z.last_value", - last_avg="z.last_avg", - z_score_value="z.z_score_value", - modified_z_score_value="z.modified_z_score_value", - last_first_quartile="z.last_first_quartile", - last_iqr="z.last_iqr", - last_third_quartile="z.last_third_quartile", - ) - }} - else - {{ - is_anomaly_from_model( - anomaly_config="m.anomaly_detector", - last_value="z.last_value", - last_avg="z.last_avg", - z_score_value="z.z_score_value", - modified_z_score_value="z.modified_z_score_value", - last_first_quartile="z.last_first_quartile", - last_iqr="z.last_iqr", - last_third_quartile="z.last_third_quartile", + when + ( + lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) + = 'up' + and {{ last_value }} > {{ last_avg }} + ) + or ( + lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) + = 'down' + and {{ last_value }} < {{ last_avg }} + ) + or ( + lower(coalesce({{ json_extract(anomaly_config, "direction") }}, 'both')) + != 'up' + and lower( + coalesce({{ json_extract(anomaly_config, "direction") }}, 'both') ) - }} - + != 'down' + ) + then + case + when {{ json_extract(anomaly_config, "name") }} = 'z_score' + then + abs({{ z_score_value }}) > cast( + {{ json_extract(anomaly_config, "threshold") }} + as {{ numeric_type() }} + ) + when {{ json_extract(anomaly_config, "name") }} = 'modified_z_score' + then + abs({{ modified_z_score_value }}) > cast( + {{ json_extract(anomaly_config, "threshold") }} + as {{ numeric_type() }} + ) + when {{ json_extract(anomaly_config, "name") }} = 'boxplot' + then + ( + {{ last_value }} + < {{ last_first_quartile }} + - ( + cast( + {{ + json_extract( + anomaly_config, + "whisker_boundary_multiplier", + ) + }} as {{ numeric_type() }} + ) + * {{ last_iqr }} + ) + or {{ last_value }} + > {{ last_third_quartile }} + + ( + cast( + {{ + json_extract( + anomaly_config, + "whisker_boundary_multiplier", + ) + }} as {{ numeric_type() }} + ) + * {{ last_iqr }} + ) + ) + else false + end + else false end +{% endmacro %} From f93d985229ba2eb6bd26bda8d9389f95c348753f Mon Sep 17 00:00:00 2001 From: suelai Date: Wed, 27 Dec 2023 13:30:55 +0100 Subject: [PATCH 5/9] non string --- models/alerts/re_data_anomalies.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/alerts/re_data_anomalies.sql b/models/alerts/re_data_anomalies.sql index dfc8dc5..5899e20 100644 --- a/models/alerts/re_data_anomalies.sql +++ b/models/alerts/re_data_anomalies.sql @@ -59,7 +59,7 @@ where then {{ is_anomaly_from_model( - anomaly_config="'c.metric_spec'", + anomaly_config="c.metric_spec", last_value="z.last_value", last_avg="z.last_avg", z_score_value="z.z_score_value", @@ -72,7 +72,7 @@ where else {{ is_anomaly_from_model( - anomaly_config="'m.anomaly_detector'", + anomaly_config="m.anomaly_detector", last_value="z.last_value", last_avg="z.last_avg", z_score_value="z.z_score_value", From 59c6f2a00146f5d0a69665dfc36a2ecb9be3d3b0 Mon Sep 17 00:00:00 2001 From: suelai Date: Wed, 27 Dec 2023 13:48:19 +0100 Subject: [PATCH 6/9] add path and new macro --- macros/utils/anomaly_labeling.sql | 135 ++++++++++++++++++++++++++++ models/alerts/re_data_anomalies.sql | 2 +- 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql index 22e04ea..0545823 100644 --- a/macros/utils/anomaly_labeling.sql +++ b/macros/utils/anomaly_labeling.sql @@ -77,3 +77,138 @@ else false end {% endmacro %} + + +{% macro is_anomaly_from_column( + anomaly_config, + last_value, + last_avg, + z_score_value, + modified_z_score_value, + last_first_quartile, + last_iqr, + last_third_quartile +) %} + case + when + ( + lower( + coalesce( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.direction", + ) + }}, + 'both' + ) + ) + = 'up' + and {{ last_value }} > {{ last_avg }} + ) + or ( + lower( + coalesce( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.direction", + ) + }}, + 'both' + ) + ) + = 'down' + and {{ last_value }} < {{ last_avg }} + ) + or ( + lower( + coalesce( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.direction", + ) + }}, + 'both' + ) + ) + != 'up' + and lower( + coalesce( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.direction", + ) + }}, + 'both' + ) + ) + != 'down' + ) + then + case + when + {{ json_extract(anomaly_config, "re_data_anomaly_detector.name") }} + = 'z_score' + then + abs({{ z_score_value }}) > cast( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.threshold", + ) + }} as {{ numeric_type() }} + ) + when + {{ json_extract(anomaly_config, "re_data_anomaly_detector.name") }} + = 'modified_z_score' + then + abs({{ modified_z_score_value }}) > cast( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.threshold", + ) + }} as {{ numeric_type() }} + ) + when + {{ json_extract(anomaly_config, "re_data_anomaly_detector.name") }} + = 'boxplot' + then + ( + {{ last_value }} + < {{ last_first_quartile }} + - ( + cast( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.whisker_boundary_multiplier", + ) + }} + as {{ numeric_type() }} + ) + * {{ last_iqr }} + ) + or {{ last_value }} + > {{ last_third_quartile }} + + ( + cast( + {{ + json_extract( + anomaly_config, + "re_data_anomaly_detector.whisker_boundary_multiplier", + ) + }} + as {{ numeric_type() }} + ) + * {{ last_iqr }} + ) + ) + else false + end + else false + end +{% endmacro %} diff --git a/models/alerts/re_data_anomalies.sql b/models/alerts/re_data_anomalies.sql index 5899e20..5c84366 100644 --- a/models/alerts/re_data_anomalies.sql +++ b/models/alerts/re_data_anomalies.sql @@ -58,7 +58,7 @@ where when c.metric_spec is not null then {{ - is_anomaly_from_model( + is_anomaly_from_column( anomaly_config="c.metric_spec", last_value="z.last_value", last_avg="z.last_avg", From 04507c84e2eb294ecf49bed720ad2fab228f8a88 Mon Sep 17 00:00:00 2001 From: suelai Date: Wed, 27 Dec 2023 16:34:27 +0100 Subject: [PATCH 7/9] add change percentage --- macros/utils/anomaly_labeling.sql | 180 ++++++++++++++++++++++++++++ models/alerts/re_data_anomalies.sql | 12 ++ 2 files changed, 192 insertions(+) diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql index 0545823..619001d 100644 --- a/macros/utils/anomaly_labeling.sql +++ b/macros/utils/anomaly_labeling.sql @@ -79,6 +79,186 @@ {% endmacro %} +{% macro is_anomaly_absolute_threshold(anomaly_config, last_value) %} + case + when + {{ + json_extract( + anomaly_config, + "absolute_threshold.threshold", + ) + }} is not null + then + case + when + + lower( + {{ + json_extract( + anomaly_config, + "absolute_threshold.direction", + ) + }} + ) + = 'up' + then + {{ last_value }} > cast( + {{ + json_extract( + anomaly_config, + "absolute_threshold.threshold", + ) + }} as {{ numeric_type() }} + ) + + when + lower( + {{ + json_extract( + anomaly_config, + "absolute_threshold.direction", + ) + }} + ) + = 'down' + then + {{ last_value }} < cast( + {{ + json_extract( + anomaly_config, + "absolute_threshold.threshold", + ) + }} as {{ numeric_type() }} + ) + when + lower( + coalesce( + {{ + json_extract( + anomaly_config, + "absolute_threshold.direction", + ) + }}, + 'both' + ) + ) + = 'both' + then + abs({{ last_value }}) > cast( + {{ + json_extract( + anomaly_config, + "absolute_threshold.threshold", + ) + }} as {{ numeric_type() }} + ) + end + else true + end + +{% endmacro %} + +{% macro is_anomaly_change_percentage(anomaly_config, last_value, last_avg) %} + case + when + {{ + json_extract( + anomaly_config, + "change_percentage.threshold", + ) + }} is not null + then + case + when + + lower( + {{ + json_extract( + anomaly_config, + "change_percentage.direction", + ) + }} + ) + = 'up' + then + ( + {{ + change_percentage( + last_value=last_value, last_avg=last_avg + ) + }} + ) > cast( + {{ + json_extract( + anomaly_config, + "change_percentage.threshold", + ) + }} as {{ numeric_type() }} + ) + + when + lower( + {{ + json_extract( + anomaly_config, + "change_percentage.direction", + ) + }} + ) + = 'down' + then + ( + {{ + change_percentage( + last_value=last_value, last_avg=last_avg + ) + }} + ) < ( + 0.0 - ( + cast( + {{ + json_extract( + anomaly_config, + "change_percentage.threshold", + ) + }} as {{ numeric_type() }} + ) + ) + ) + when + lower( + coalesce( + {{ + json_extract( + anomaly_config, + "change_percentage.direction", + ) + }}, + 'both' + ) + ) + = 'both' + then + abs( + {{ + change_percentage( + last_value=last_value, last_avg=last_avg + ) + }} + ) > cast( + {{ + json_extract( + anomaly_config, + "change_percentage.threshold", + ) + }} as {{ numeric_type() }} + ) + end + else true + end + +{% endmacro %} + {% macro is_anomaly_from_column( anomaly_config, last_value, diff --git a/models/alerts/re_data_anomalies.sql b/models/alerts/re_data_anomalies.sql index 5c84366..8d27d65 100644 --- a/models/alerts/re_data_anomalies.sql +++ b/models/alerts/re_data_anomalies.sql @@ -69,6 +69,18 @@ where last_third_quartile="z.last_third_quartile", ) }} + and {{ + is_anomaly_absolute_threshold( + anomaly_config="c.metric_spec", last_value="z.last_value" + ) + }} + and {{ + is_anomaly_change_percentage( + anomaly_config="c.metric_spec", + last_value="z.last_value", + last_avg="z.last_avg", + ) + }} else {{ is_anomaly_from_model( From 45b8781c52e1bd8d418a26b73edeca19757e1f6d Mon Sep 17 00:00:00 2001 From: suelai Date: Tue, 16 Jan 2024 10:10:55 +0100 Subject: [PATCH 8/9] add change_percentage --- macros/utils/anomaly_labeling.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql index 619001d..dd17b1e 100644 --- a/macros/utils/anomaly_labeling.sql +++ b/macros/utils/anomaly_labeling.sql @@ -1,3 +1,9 @@ +{% macro change_percentage(last_value, last_avg) %} + cast({{ last_value }} - {{ last_avg }} as float64) + / nullif(cast({{ last_avg }} as float64), 0) + * 100.0 +{% endmacro %} + {% macro is_anomaly_from_model( anomaly_config, last_value, From 47a092695687bda230b8179db2b7491780dfe66e Mon Sep 17 00:00:00 2001 From: David Zajac Date: Mon, 22 Jan 2024 19:49:19 -0300 Subject: [PATCH 9/9] Fix for numeric_type for all dbs --- macros/utils/anomaly_labeling.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/utils/anomaly_labeling.sql b/macros/utils/anomaly_labeling.sql index dd17b1e..97cecb2 100644 --- a/macros/utils/anomaly_labeling.sql +++ b/macros/utils/anomaly_labeling.sql @@ -1,6 +1,6 @@ {% macro change_percentage(last_value, last_avg) %} - cast({{ last_value }} - {{ last_avg }} as float64) - / nullif(cast({{ last_avg }} as float64), 0) + cast({{ last_value }} - {{ last_avg }} as {{ numeric_type() }}) + / nullif(cast({{ last_avg }} as {{ numeric_type() }}), 0) * 100.0 {% endmacro %}