From 9e621fbcc581dfe3cab222800c0536438c2f0246 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Wed, 17 Apr 2024 16:49:59 +0100 Subject: [PATCH] Add option to use refer in channel classification if mkt is null --- CHANGELOG | 11 ++ dbt_project.yml | 1 + .../field_definitions/channel_group_query.sql | 126 ++++++++++++------ .../snowplow_unified_sessions_this_run.sql | 7 +- .../snowplow_unified_views_this_run.sql | 7 +- 5 files changed, 111 insertions(+), 41 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b2ef8bfd..bdcd95f8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,14 @@ +snowplow-unified 0.4.1 (2024-0X-XX) +--------------------------------------- +## Summary +XXX + +## Features +- New `snowplow__use_refr_if_mkt_null` variable to use `refr_` fields if `mkt_` ones are null in default channel group classification + +## Upgrading +Bump the snowplow-unified version in your `packages.yml` file. + snowplow-unified 0.4.0 (2024-03-25) --------------------------------------- ## Summary diff --git a/dbt_project.yml b/dbt_project.yml index 9a638bd7..2669d47e 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -65,6 +65,7 @@ vars: snowplow__start_date: '2020-01-01' # snowplow__total_all_conversions: false snowplow__upsert_lookback_days: 30 + snowplow__use_refr_if_mkt_null: false # please refer to the macros within identifiers.sql for default values snowplow__session_identifiers: [] diff --git a/macros/field_definitions/channel_group_query.sql b/macros/field_definitions/channel_group_query.sql index 41a776f9..a8a1a9bb 100644 --- a/macros/field_definitions/channel_group_query.sql +++ b/macros/field_definitions/channel_group_query.sql @@ -11,10 +11,26 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% macro bigquery__channel_group_query() %} +{% set src_field %} + {% if var('snowplow__use_refr_if_mkt_null', false) %} + coalesce(mkt_source, refr_source) + {% else %} + mkt_source + {% endif %} +{% endset %} +{% set medium_field %} + {% if var('snowplow__use_refr_if_mkt_null', false) %} + coalesce(mkt_medium, refr_medium) + {% else %} + mkt_medium + {% endif %} +{% endset %} +{# Note that campaign has no equivalent in refer #} + case - when lower(trim(mkt_source)) = 'direct' and lower(trim(mkt_medium)) in ('not set', 'none') then 'Direct' - when lower(trim(mkt_medium)) like '%cross-network%' then 'Cross-network' - when regexp_contains(trim(mkt_medium), r'(?i)^(.*cp.*|ppc|retargeting|paid.*)$') then + when lower(trim({{ src_field }})) = 'direct' and lower(trim({{ medium_field }})) in ('not set', 'none') then 'Direct' + when lower(trim({{ medium_field }})) like '%cross-network%' then 'Cross-network' + when regexp_contains(trim({{ medium_field }}), r'(?i)^(.*cp.*|ppc|retargeting|paid.*)$') then case when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING' or regexp_contains(trim(mkt_campaign), r'(?i)^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Paid Shopping' @@ -23,28 +39,44 @@ case when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' then 'Paid Video' else 'Paid Other' end - when lower(trim(mkt_medium)) in ('display', 'banner', 'expandable', 'interstitial', 'cpm') then 'Display' + when lower(trim({{ medium_field }})) in ('display', 'banner', 'expandable', 'interstitial', 'cpm') then 'Display' when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING' or regexp_contains(trim(mkt_campaign), r'(?i)^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Organic Shopping' - when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim(mkt_medium)) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social' + when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim({{ medium_field }})) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social' when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' - or regexp_contains(trim(mkt_medium), r'(?i)^(.*video.*)$') then 'Organic Video' - when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim(mkt_medium)) = 'organic' then 'Organic Search' - when lower(trim(mkt_medium)) in ('referral', 'app', 'link') then 'Referral' - when lower(trim(mkt_source)) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim(mkt_medium)) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email' - when lower(trim(mkt_medium)) = 'affiliate' then 'Affiliates' - when lower(trim(mkt_medium)) = 'audio' then 'Audio' - when lower(trim(mkt_source)) = 'sms' or lower(trim(mkt_medium)) = 'sms' then 'SMS' - when lower(trim(mkt_medium)) like '%push' or regexp_contains(trim(mkt_medium), r'(?i).*(mobile|notification).*') or lower(trim(mkt_source)) = 'firebase' then 'Mobile Push Notifications' + or regexp_contains(trim({{ medium_field }}), r'(?i)^(.*video.*)$') then 'Organic Video' + when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim({{ medium_field }})) = 'organic' then 'Organic Search' + when lower(trim({{ medium_field }})) in ('referral', 'app', 'link') then 'Referral' + when lower(trim({{ src_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim({{ medium_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email' + when lower(trim({{ medium_field }})) = 'affiliate' then 'Affiliates' + when lower(trim({{ medium_field }})) = 'audio' then 'Audio' + when lower(trim({{ src_field }})) = 'sms' or lower(trim({{ medium_field }})) = 'sms' then 'SMS' + when lower(trim({{ medium_field }})) like '%push' or regexp_contains(trim({{ medium_field }}), r'(?i).*(mobile|notification).*') or lower(trim({{ src_field }})) = 'firebase' then 'Mobile Push Notifications' else 'Unassigned' end {% endmacro %} {% macro default__channel_group_query() %} +{% set src_field %} + {% if var('snowplow__use_refr_if_mkt_null', false) %} + coalesce(mkt_source, refr_source) + {% else %} + mkt_source + {% endif %} +{% endset %} +{% set medium_field %} + {% if var('snowplow__use_refr_if_mkt_null', false) %} + coalesce(mkt_medium, refr_medium) + {% else %} + mkt_medium + {% endif %} +{% endset %} +{# Note that campaign has no equivalent in refer #} + case - when lower(trim(mkt_source)) = 'direct' and lower(trim(mkt_medium)) in ('not set', 'none') then 'Direct' - when lower(trim(mkt_medium)) like '%cross-network%' then 'Cross-network' - when regexp_like(lower(trim(mkt_medium)), '^(.*cp.*|ppc|retargeting|paid.*)$') then + when lower(trim({{ src_field }})) = 'direct' and lower(trim({{ medium_field }})) in ('not set', 'none') then 'Direct' + when lower(trim({{ medium_field }})) like '%cross-network%' then 'Cross-network' + when regexp_like(lower(trim({{ medium_field }})), '^(.*cp.*|ppc|retargeting|paid.*)$') then case when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING' or regexp_like(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Paid Shopping' @@ -53,28 +85,44 @@ case when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' then 'Paid Video' else 'Paid Other' end - when lower(trim(mkt_medium)) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display' + when lower(trim({{ medium_field }})) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display' when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING' or regexp_like(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Organic Shopping' - when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim(mkt_medium)) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social' + when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim({{ medium_field }})) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social' when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' - or regexp_like(lower(trim(mkt_medium)), '^(.*video.*)$') then 'Organic Video' - when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim(mkt_medium)) = 'organic' then 'Organic Search' - when lower(trim(mkt_medium)) in ('referral', 'app', 'link') then 'Referral' - when lower(trim(mkt_source)) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim(mkt_medium)) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email' - when lower(trim(mkt_medium)) = 'affiliate' then 'Affiliates' - when lower(trim(mkt_medium)) = 'audio' then 'Audio' - when lower(trim(mkt_source)) = 'sms' or lower(trim(mkt_medium)) = 'sms' then 'SMS' - when lower(trim(mkt_medium)) like '%push' or regexp_like(lower(trim(mkt_medium)), '.*(mobile|notification).*') or lower(trim(mkt_source)) = 'firebase' then 'Mobile Push Notifications' + or regexp_like(lower(trim({{ medium_field }})), '^(.*video.*)$') then 'Organic Video' + when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim({{ medium_field }})) = 'organic' then 'Organic Search' + when lower(trim({{ medium_field }})) in ('referral', 'app', 'link') then 'Referral' + when lower(trim({{ src_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim({{ medium_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email' + when lower(trim({{ medium_field }})) = 'affiliate' then 'Affiliates' + when lower(trim({{ medium_field }})) = 'audio' then 'Audio' + when lower(trim({{ src_field }})) = 'sms' or lower(trim({{ medium_field }})) = 'sms' then 'SMS' + when lower(trim({{ medium_field }})) like '%push' or regexp_like(lower(trim({{ medium_field }})), '.*(mobile|notification).*') or lower(trim({{ src_field }})) = 'firebase' then 'Mobile Push Notifications' else 'Unassigned' end {% endmacro %} {% macro redshift__channel_group_query() %} +{% set src_field %} + {% if var('snowplow__use_refr_if_mkt_null', false) %} + coalesce(mkt_source, refr_source) + {% else %} + mkt_source + {% endif %} +{% endset %} +{% set medium_field %} + {% if var('snowplow__use_refr_if_mkt_null', false) %} + coalesce(mkt_medium, refr_medium) + {% else %} + mkt_medium + {% endif %} +{% endset %} +{# Note that campaign has no equivalent in refer #} + case - when lower(trim(mkt_source)) = 'direct' and lower(trim(mkt_medium)) in ('not set', 'none') then 'Direct' - when lower(trim(mkt_medium)) like '%cross-network%' then 'Cross-network' - when regexp_instr(lower(trim(mkt_medium)), '^(.*cp.*|ppc|retargeting|paid.*)$') then + when lower(trim({{ src_field }})) = 'direct' and lower(trim({{ medium_field }})) in ('not set', 'none') then 'Direct' + when lower(trim({{ medium_field }})) like '%cross-network%' then 'Cross-network' + when regexp_instr(lower(trim({{ medium_field }})), '^(.*cp.*|ppc|retargeting|paid.*)$') then case when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING' or regexp_instr(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Paid Shopping' @@ -83,19 +131,19 @@ case when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' then 'Paid Video' else 'Paid Other' end - when lower(trim(mkt_medium)) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display' + when lower(trim({{ medium_field }})) in ('display', 'banner', 'expandable', 'intersitial', 'cpm') then 'Display' when upper(source_category) = 'SOURCE_CATEGORY_SHOPPING' or regexp_instr(lower(trim(mkt_campaign)), '^(.*(([^a-df-z]|^)shop|shopping).*)$') then 'Organic Shopping' - when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim(mkt_medium)) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social' + when upper(source_category) = 'SOURCE_CATEGORY_SOCIAL' or lower(trim({{ medium_field }})) in ('social', 'social-network', 'sm', 'social network', 'social media') then 'Organic Social' when upper(source_category) = 'SOURCE_CATEGORY_VIDEO' - or regexp_instr(lower(trim(mkt_medium)), '^(.*video.*)$') then 'Organic Video' - when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim(mkt_medium)) = 'organic' then 'Organic Search' - when lower(trim(mkt_medium)) in ('referral', 'app', 'link') then 'Referral' - when lower(trim(mkt_source)) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim(mkt_medium)) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email' - when lower(trim(mkt_medium)) = 'affiliate' then 'Affiliates' - when lower(trim(mkt_medium)) = 'audio' then 'Audio' - when lower(trim(mkt_source)) = 'sms' or lower(trim(mkt_medium)) = 'sms' then 'SMS' - when lower(trim(mkt_medium)) like '%push' or regexp_instr(lower(trim(mkt_medium)), '.*(mobile|notification).*') or lower(trim(mkt_source)) = 'firebase' then 'Mobile Push Notifications' + or regexp_instr(lower(trim({{ medium_field }})), '^(.*video.*)$') then 'Organic Video' + when upper(source_category) = 'SOURCE_CATEGORY_SEARCH' or lower(trim({{ medium_field }})) = 'organic' then 'Organic Search' + when lower(trim({{ medium_field }})) in ('referral', 'app', 'link') then 'Referral' + when lower(trim({{ src_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') or lower(trim({{ medium_field }})) in ('email', 'e-mail', 'e_mail', 'e mail') then 'Email' + when lower(trim({{ medium_field }})) = 'affiliate' then 'Affiliates' + when lower(trim({{ medium_field }})) = 'audio' then 'Audio' + when lower(trim({{ src_field }})) = 'sms' or lower(trim({{ medium_field }})) = 'sms' then 'SMS' + when lower(trim({{ medium_field }})) like '%push' or regexp_instr(lower(trim({{ medium_field }})), '.*(mobile|notification).*') or lower(trim({{ src_field }})) = 'firebase' then 'Mobile Push Notifications' else 'Unassigned' end {% endmacro %} diff --git a/models/sessions/scratch/snowplow_unified_sessions_this_run.sql b/models/sessions/scratch/snowplow_unified_sessions_this_run.sql index 09f8c217..f4c34f89 100644 --- a/models/sessions/scratch/snowplow_unified_sessions_this_run.sql +++ b/models/sessions/scratch/snowplow_unified_sessions_this_run.sql @@ -86,7 +86,12 @@ with session_firsts as ( from {{ ref('snowplow_unified_events_this_run') }} ev left join - {{ ref(var('snowplow__ga4_categories_seed')) }} c on lower(trim(ev.mkt_source)) = lower(c.source) + {{ ref(var('snowplow__ga4_categories_seed')) }} c on + {% if var('snowplow__use_refr_if_mkt_null', false) %} + lower(trim(coalesce(ev.mkt_source, ev.refr_source)) = lower(c.source) + {% else %} + lower(trim(ev.mkt_source)) = lower(c.source) + {% endif %} left join {{ ref(var('snowplow__rfc_5646_seed')) }} l on lower(ev.br_lang) = lower(l.lang_tag) left join diff --git a/models/views/scratch/snowplow_unified_views_this_run.sql b/models/views/scratch/snowplow_unified_views_this_run.sql index 7a6e78dc..ee0cdeb7 100644 --- a/models/views/scratch/snowplow_unified_views_this_run.sql +++ b/models/views/scratch/snowplow_unified_views_this_run.sql @@ -92,7 +92,12 @@ with prep as ( from {{ ref('snowplow_unified_events_this_run') }} as ev - left join {{ ref(var('snowplow__ga4_categories_seed')) }} c on lower(trim(ev.mkt_source)) = lower(c.source) + left join {{ ref(var('snowplow__ga4_categories_seed')) }} c on + {% if var('snowplow__use_refr_if_mkt_null', false) %} + lower(trim(coalesce(ev.mkt_source, ev.refr_source)) = lower(c.source) + {% else %} + lower(trim(ev.mkt_source)) = lower(c.source) + {% endif %} where ev.event_name in ('page_view', 'screen_view') and ev.view_id is not null