diff --git a/Advanced features/Custom_moving_mau_window.sql b/Advanced features/Custom_moving_mau_window.sql index 29f29cd..7bfc864 100644 --- a/Advanced features/Custom_moving_mau_window.sql +++ b/Advanced features/Custom_moving_mau_window.sql @@ -1,11 +1,9 @@ --In this query we create a time slice by extrapolating the days between two fixed dates --this is then joined onto the user activity of x days in the past. And next we count the unique number of users . -WITH dates AS - (SELECT DAY::date - FROM - (SELECT (CURRENT_DATE-30)::TIMESTAMP AS date_range - UNION SELECT CURRENT_DATE::TIMESTAMP)AS ts timeseries DAY AS '1 days' over ( - ORDER BY date_range)), +WITH dates AS ( +SELECT DISTINCT EVENTDATE AS DAY +FROM events +WHERE EVENTDATE BETWEEN current_date - 30 AND current_date), activity AS (SELECT DISTINCT event_date, user_id diff --git a/Advanced features/Hourly Player Session Distribution in local time.sql b/Advanced features/Hourly Player Session Distribution in local time.sql index 8ce738c..3c81f2d 100644 --- a/Advanced features/Hourly Player Session Distribution in local time.sql +++ b/Advanced features/Hourly Player Session Distribution in local time.sql @@ -8,7 +8,7 @@ with firstValues as ( group by userId, eventTimestamp) , results as ( select userID - , TIMESTAMPADD(minute, case when CHAR_LENGTH(TZO)=5 then CAST(substring(tzo, 1,1)||'1' as INTEGER) *-- get positive vs negative tzoffset + , TIMESTAMPADD(minute, case when LEN(TZO)=5 then CAST(substring(tzo, 1,1)||'1' as INTEGER) *-- get positive vs negative tzoffset (CAST(substring(tzo, 2,2) as INTEGER)*60 + cast(substring(tzo, 4,2) as INTEGER)) else null --get tzoffset in minutes end ,startTs )as localEventTimestamp from firstValues) diff --git a/Advanced features/SessionCounter example.sql b/Advanced features/SessionCounter example.sql index 3d7cce3..3dd51e7 100644 --- a/Advanced features/SessionCounter example.sql +++ b/Advanced features/SessionCounter example.sql @@ -14,7 +14,7 @@ WITH DATA AS SELECT eventName, count(*) AS events, count(DISTINCT userId) AS users, - round(count(*) / count(DISTINCT userId),2.0) AS 'ev per user in first session' + round(count(*) / count(DISTINCT userId),2.0) AS "ev per user in first session" FROM DATA WHERE sessionCounter = 0 GROUP BY eventName diff --git a/Advanced features/lastUiName.sql b/Advanced features/lastMissionId.sql similarity index 75% rename from Advanced features/lastUiName.sql rename to Advanced features/lastMissionId.sql index ced9367..264eea9 100644 --- a/Advanced features/lastUiName.sql +++ b/Advanced features/lastMissionId.sql @@ -1,8 +1,8 @@ ---Find the last UiName per user and compare these for current players and players that haven't been playing for a while +--Find the last missionID per user and compare these for current players and players that haven't been playing for a while WITH DATA AS (SELECT userId, eventTimestamp, - first_value(UIName + first_value(missionID IGNORE nulls) over (partition BY userId ORDER BY eventTimestamp) AS lastValue --get the first ever value backwards FROM EVENTS @@ -10,10 +10,10 @@ FROM EVENTS ) ,aggregates AS (SELECT userId, max(eventTimestamp)::date AS last_seen_date, - MAX (lastValue) AS lastUiName + MAX (lastValue) AS lastMissionID FROM DATA GROUP BY userId) -SELECT lastUiName, +SELECT lastMissionID, count(CASE WHEN last_seen_date> CURRENT_DATE-7 THEN 1 ELSE NULL @@ -23,4 +23,4 @@ SELECT lastUiName, ELSE NULL END) lapsedPlayers FROM aggregates -GROUP BY lastUiName +GROUP BY lastMissionID diff --git a/Advanced features/time_slice_concurrency.sql b/Advanced features/time_slice_concurrency.sql index 91600d0..773316c 100644 --- a/Advanced features/time_slice_concurrency.sql +++ b/Advanced features/time_slice_concurrency.sql @@ -2,10 +2,13 @@ --create a dataset with timestamps between two points at regular intervals --join sessions on these intervals if they start before and end after this point in time. --Count the number of sessions on these intervals. -with timeslots as (select slice_time - from (select (CURRENT_DATE-10)::timestamp as date_range union select now())as ts - timeseries slice_time as '10 minutes' over (order by date_range)), -sessions as (select userid, sessionid, min(eventTimestamp) as sessionStart, max(eventTimestamp) as sessionEnd +with timeslots as ( +SELECT DISTINCT TIME_SLICE(EVENTTIMESTAMP, 10, 'MINUTE') AS slice_time +FROM events +WHERE EVENTTIMESTAMP BETWEEN CURRENT_TIMESTAMP - INTERVAL '10 day' AND CURRENT_TIMESTAMP + ), +sessions as ( +select userid, sessionid, min(eventTimestamp) as sessionStart, max(eventTimestamp) as sessionEnd FROM events WHERE eventTimestamp between (CURRENT_DATE-10) and now() and sessionId is not null diff --git a/Advanced features/transactions_per_clientVersion.sql b/Advanced features/transactions_per_clientVersion.sql index 3db0ce3..c7f9299 100644 --- a/Advanced features/transactions_per_clientVersion.sql +++ b/Advanced features/transactions_per_clientVersion.sql @@ -1,5 +1,6 @@ --Since the clientVersion is sent in on the gameStarted event at the start of every session but not on each event we can't just filter events by clientVersion in one go. --However we can extrapolate the clientVersion using an analytic function (with the OVER statement) +--As Snowflake provides you with 12 months worth of data, it is recommeneded that you restrict this query as it will otherwise take a very long time to process. with data as ( select last_value(clientVersion ignore nulls) over (partition by userId order by eventId) as currentClientversion ,userId diff --git a/KPIs/ARPDAU_and_revenue_with_smartads.sql b/KPIs/ARPDAU_and_revenue_with_smartads.sql index fa0ca1d..0e8399d 100644 --- a/KPIs/ARPDAU_and_revenue_with_smartads.sql +++ b/KPIs/ARPDAU_and_revenue_with_smartads.sql @@ -5,7 +5,7 @@ select eventdate, round( (sum(convertedproductAmount)+sum(adEcpm/1000))/100/count(distinct userid) ,4)::float as ARPDAU, -(sum(convertedproductAmount)+sum(adEcpm)/1000)/100::float as 'revenue for both IAP and SmartAds' +(sum(convertedproductAmount)+sum(adEcpm)/1000)/100::float as "revenue for both IAP and SmartAds" from events where eventdate between current_date -30 and current_date group by eventdate diff --git a/KPIs/First_IAP_by_transactionName.sql b/KPIs/First_IAP_by_transactionName.sql index 0cd2d7d..9139fb3 100644 --- a/KPIs/First_IAP_by_transactionName.sql +++ b/KPIs/First_IAP_by_transactionName.sql @@ -8,7 +8,7 @@ where convertedProductAmount>0 and revenueValidated in (0,1) ) select transactionName, - count(*) as 'IAP count', + count(*) as "IAP count", sum(convertedproductAmount)/100::float as Revenue from transactions where transactionNumber = 1 diff --git a/KPIs/IAP_per_platform_per_day.sql b/KPIs/IAP_per_platform_per_day.sql index c392885..55d707a 100644 --- a/KPIs/IAP_per_platform_per_day.sql +++ b/KPIs/IAP_per_platform_per_day.sql @@ -13,8 +13,7 @@ WITH data AS ( FROM events e WHERE e.eventname IN ( 'newPlayer', 'gameStarted', 'transaction' ) - AND gauserstartdate > current_date - 31) - + AND gauserstartdate > current_date - 31) SELECT daynumber, COUNT(DISTINCT(CASE WHEN platform LIKE 'IOS%' THEN userid END)) AS "iOS Users", COUNT(DISTINCT(CASE WHEN platform LIKE 'IOS%' diff --git a/KPIs/Mission Starters vs Completers.sql b/KPIs/Mission Starters vs Completers.sql index 1489d58..ab218ca 100644 --- a/KPIs/Mission Starters vs Completers.sql +++ b/KPIs/Mission Starters vs Completers.sql @@ -9,7 +9,7 @@ WITH DATA AS FROM events WHERE missionName IS NOT NULL), nonCompletionData AS (SELECT missionName, - count(DISTINCT CASE WHEN missionStartedFlag THEN userId ELSE NULL END) AS players, + count(DISTINCT CASE WHEN missionStartedFlag = 1 THEN userId ELSE NULL END) AS players, count(DISTINCT CASE WHEN missionStartedFlag = 1 AND missionCompletedFlag = 1 THEN userId ELSE NULL END) AS completedPlayers, count(DISTINCT CASE WHEN missionStartedFlag = 1 diff --git a/KPIs/Time_spent_per_level.sql b/KPIs/Time_spent_per_level.sql index 4bf506a..f9272e0 100644 --- a/KPIs/Time_spent_per_level.sql +++ b/KPIs/Time_spent_per_level.sql @@ -9,11 +9,11 @@ WITH DATA AS msSinceLastEvent FROM EVENTS) SELECT coalesce(current_level, 0) AS LEVEL,--replace not known yet with level 0 - sum(msSinceLastEvent)/1000 AS 'Seconds Spent On Level', - count(DISTINCT userId) AS 'Number of users', - count(DISTINCT eventDate) AS 'Total days spent', - count(DISTINCT eventDate)/count(DISTINCT userId) AS 'average days spent', - sum(msSinceLastEvent)/1000/count(DISTINCT userId) AS 'average seconds in game spent on level' + sum(msSinceLastEvent)/1000 AS "Seconds Spent On Level", + count(DISTINCT userId) AS "Number of users", + count(DISTINCT eventDate) AS "Total days spent", + count(DISTINCT eventDate)/count(DISTINCT userId) AS "average days spent", + sum(msSinceLastEvent)/1000/count(DISTINCT userId) AS "average seconds in game spent on level" FROM DATA GROUP BY current_level ORDER BY current_level diff --git a/KPIs/average_mission length.sql b/KPIs/average_mission length.sql index ccaf6db..fe918fa 100644 --- a/KPIs/average_mission length.sql +++ b/KPIs/average_mission length.sql @@ -16,6 +16,7 @@ WITH missions AS (SELECT userId, missionGroup, missionID, + eventName sum(msSinceLastEvent) over (partition BY missionGroup, userId ORDER BY eventTimestamp)AS MissionCompletedTime FROM missions diff --git a/KPIs/campaign_conversion_with_window.sql b/KPIs/campaign_conversion_with_window.sql index fa6ed26..e57c63d 100644 --- a/KPIs/campaign_conversion_with_window.sql +++ b/KPIs/campaign_conversion_with_window.sql @@ -7,16 +7,16 @@ COALESCE(stepID,1) as stepID, min(eventTimestamp) as stepTime, count(userID) over (partition by COALESCE(stepID,1), eventDate order by min(eventTimestamp)) as total_participants from events -where eventName = 'outOfGameSend' -and communicationState = 'SENT' +where eventName = "outOfGameSend" +and communicationState = "SENT" and campaignID = 100--the ID of the campaign -and stepType = 'STANDARD' +and stepType = "STANDARD" group by userID, COALESCE(stepID,1), eventDate ) -select c.stepTime::date as 'Date', c.stepID AS Step, max(total_participants) as Participants, count(distinct e.userID) as 'Occurred', round(count(distinct e.userID)/max(total_participants)*100,2)::float as '%Occurrence' +select c.stepTime::date as "Date", c.stepID AS Step, max(total_participants) as Participants, count(distinct e.userID) as "Occurred", round(count(distinct e.userID)/max(total_participants)*100,2)::float as "%Occurrence" from in_campaign c left join events e on e.userID = c.userID and e.eventTimestamp > c.stepTime -and e.eventTimestamp < c.stepTime + interval '7 day' -and e.eventName = 'gameStarted' +and e.eventTimestamp < c.stepTime + interval "7 day" +and e.eventName = "gameStarted" group by c.stepID, c.stepTime::date order by c.stepTime::date, c.stepID, Participants diff --git a/KPIs/count_days_until_first-purchase.sql b/KPIs/count_days_until_first-purchase.sql index a79c533..4080983 100644 --- a/KPIs/count_days_until_first-purchase.sql +++ b/KPIs/count_days_until_first-purchase.sql @@ -7,7 +7,6 @@ WITH userPurchases AS ( FROM fact_user_sessions_day WHERE revenue > 0 ) - SELECT daysSinceInstall AS daysUntilFirstPurchase, COUNT(DISTINCT user_id) AS userCount diff --git a/KPIs/hours_until_first_purchase.sql b/KPIs/hours_until_first_purchase.sql index 1e1345f..a1ebb86 100644 --- a/KPIs/hours_until_first_purchase.sql +++ b/KPIs/hours_until_first_purchase.sql @@ -17,7 +17,6 @@ WITH transactions AS ( eventname IN ( 'transaction', 'newPlayer', 'gameStarted' ) AND gauserstartdate > (SELECT Min(eventtimestamp) FROM EVENTS) ) - SELECT userid AS userId, firsteventtimestamp AS userFirstSeen, diff --git a/KPIs/in_game_test_event.sql b/KPIs/in_game_test_event.sql index 959a4a9..8b80361 100644 --- a/KPIs/in_game_test_event.sql +++ b/KPIs/in_game_test_event.sql @@ -7,32 +7,26 @@ WITH relevant_data AS ( responseVariantName, responseMessageSequence, eventName, - --Check if the next event for this user is the test event - lead(eventName = 'gameStarted') OVER ( userWindow ) AS nextEventIsTestEvent, - --Get the time of the following event - lead(eventTimestamp) OVER ( userWindow ) AS nextTimestamp + lead(eventName = 'gameStarted') OVER (PARTITION BY userId ORDER BY eventTimestamp) AS nextEventIsTestEvent, + lead(eventTimestamp) OVER (PARTITION BY userId ORDER BY eventTimestamp) AS nextTimestamp FROM events WHERE ((eventName = 'engageResponse' and responseEngagementName = 'A/B test campaign' ) OR (eventName = 'gameStarted' ) ) - WINDOW userWindow AS (PARTITION BY userId ORDER BY eventTimestamp ) ), results AS ( SELECT eventDate AS Date, responseVariantName AS Variant, responseMessageSequence AS Step, count(DISTINCT userID) AS Participants, - -- Get the number of people who reach the test event within the interval - count(DISTINCT CASE WHEN nextEventIsTestEvent AND (nextTimestamp - eventTimestamp) <= '1 days' :: INTERVAL - THEN userId END) AS Occurred + count(DISTINCT CASE WHEN nextEventIsTestEvent AND datediff(d, eventTimestamp, nextTimestamp) <= 1 + THEN userId END) AS Occurred FROM relevant_data WHERE (eventName = 'engageResponse') GROUP BY eventDate, responseVariantName, responseMessageSequence ) SELECT *, - round(Occurred / Participants * 100, 2) :: FLOAT AS "Occurred %" -FROM results -ORDER BY Date, Variant, Step + round(Occurred / Participants * 100, 2) :: FLOAT AS "Occurred %" \ No newline at end of file diff --git a/KPIs/median_and_average_session_times.sql b/KPIs/median_and_average_session_times.sql index d02c215..98f9145 100644 --- a/KPIs/median_and_average_session_times.sql +++ b/KPIs/median_and_average_session_times.sql @@ -1,26 +1,55 @@ --Get the median and average session times --The median times can only be retrieved in an analytic function -WITH DATA AS - (SELECT min(eventDate) AS eventDate, - sessionId, - userId, - sum(msSinceLastEvent) AS sessionDurationMs, - count(*)AS eventCount - FROM EVENTS - GROUP BY sessionId, - userId) ,medianValues as - (SELECT *, MEDIAN(sessionDurationMs) OVER (PARTITION BY eventDate) AS medianSessionTime - FROM DATA - WHERE eventCount>1-- exclude sessions with just one event +with data as ( +select + min(eventDate) as eventDate, + sessionId, + userId, + sum(msSinceLastEvent) as sessionDurationMs, + count(*)as eventCount +from + EVENTS +group by + sessionId, + userId) , +medianValues as ( +select + *, + MEDIAN(sessionDurationMs) over (partition by eventDate) as medianSessionTime +from + data +where + eventCount>1 + -- exclude sessions with just one event ) -SELECT eventDate, - round(avg(sessionDurationMs)/1000,2.0) AS 'Mean session time in seconds', - (avg(sessionDurationMs)::varchar||'ms')::interval AS 'Mean session time as interval', - round(medianSessionTime/1000, 2.0) AS 'Medain session time in seconds', - (medianSessionTime::varchar||'ms')::interval AS 'Median session time as interval', - count(DISTINCT userId) AS 'Sample size users', - count(DISTINCT sessionId) AS 'Sample size sessions' -FROM medianValues -GROUP BY eventDate, - medianSessionTime -ORDER BY eventDate DESC +select + eventDate, + round(avg(sessionDurationMs)/ 1000, 2.0) as "Mean session time in seconds", + case -- format time into an interval + when avg(sessionDurationMs) < 0 then '-' || TO_CHAR(TRUNC(ABS(avg(sessionDurationMs))/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(ABS(avg(sessionDurationMs)), 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(ABS(avg(sessionDurationMs)), 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(ABS(avg(sessionDurationMs)), 1000), 'FM000') + else TO_CHAR(TRUNC(avg(sessionDurationMs)/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(avg(sessionDurationMs), 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(avg(sessionDurationMs), 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(avg(sessionDurationMs), 1000), 'FM000') end as "Mean session time as interval", + round(medianSessionTime / 1000, 2.0) as "Medain session time in seconds", + case + when medianSessionTime < 0 then '-' || TO_CHAR(TRUNC(ABS(medianSessionTime)/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(ABS(medianSessionTime), 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(ABS(medianSessionTime), 60000)/ 1000), + 'FM00') || '.' || TO_CHAR(mod(ABS(medianSessionTime), 1000), 'FM000') + else TO_CHAR(TRUNC(medianSessionTime/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(medianSessionTime, 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(medianSessionTime, 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(medianSessionTime, 1000), 'FM000') end as "Median session time as interval", + count(distinct userId) as "Sample size users", + count(distinct sessionId) as "Sample size sessions" +from + medianValues +group by + eventDate, + medianSessionTime +order by + eventDate desc diff --git a/KPIs/retention_for_min_event_date_only.sql b/KPIs/retention_for_min_event_date_only.sql index ec03654..bd1fff5 100644 --- a/KPIs/retention_for_min_event_date_only.sql +++ b/KPIs/retention_for_min_event_date_only.sql @@ -10,17 +10,17 @@ group by player_start_date, ), retention AS ( SELECT player_start_date, COUNT (DISTINCT CASE WHEN n = 0 THEN user_id ELSE NULL END)AS installs, - COUNT (DISTINCT CASE WHEN n = 1 THEN user_id ELSE NULL END)AS 'day 1 retention', - COUNT (DISTINCT CASE WHEN n = 7 THEN user_id ELSE NULL END)AS 'day 7 retention', - COUNT (DISTINCT CASE WHEN n = 14 THEN user_id ELSE NULL END)AS 'day 14 retention', - COUNT (DISTINCT CASE WHEN n = 30 THEN user_id ELSE NULL END)AS 'day 30 retention' + COUNT (DISTINCT CASE WHEN n = 1 THEN user_id ELSE NULL END)AS "day 1 retention", + COUNT (DISTINCT CASE WHEN n = 7 THEN user_id ELSE NULL END)AS "day 7 retention", + COUNT (DISTINCT CASE WHEN n = 14 THEN user_id ELSE NULL END)AS "day 14 retention", + COUNT (DISTINCT CASE WHEN n = 30 THEN user_id ELSE NULL END)AS "day 30 retention" FROM DATA GROUP BY player_start_date) SELECT *, -round("day 1 retention"/"installs"*100,2.0) as 'D1%', -round("day 7 retention"/"installs"*100,2.0) as 'D7%', -round("day 14 retention"/"installs"*100,2.0) as 'D14%', -round("day 30 retention"/"installs"*100,2.0) as 'D30%' +round("day 1 retention"/"installs"*100,2.0) as "D1%", +round("day 7 retention"/"installs"*100,2.0) as "D7%", +round("day 14 retention"/"installs"*100,2.0) as "D14%", +round("day 30 retention"/"installs"*100,2.0) as "D30%" FROM retention ORDER BY player_start_date DESC diff --git a/KPIs/revenue_per_hour.sql b/KPIs/revenue_per_hour.sql index 525ce4c..cc6680f 100644 --- a/KPIs/revenue_per_hour.sql +++ b/KPIs/revenue_per_hour.sql @@ -2,11 +2,11 @@ then get the revenue for this hour, the total number of users as well as the total number of spenders within this window. */ select date_trunc('hour', eventTimestamp) as time, -to_char(sum(convertedproductamount)/100, '$999,999,999,999.00') as revenue, +to_char(sum(convertedproductamount)/100, '"$"999,999,999,999.00') as revenue, count(distinct userid) active_users, count(distinct case when convertedproductamount>0 then userId end) as spenders from events where revenuevalidated not in (2,3) and eventName in ('gameStarted', 'transaction') -and eventTimestamp between now() - interval '100 hours' and now() +and eventTimestamp between current_timestamp() - interval '100 hours' and current_timestamp() group by 1 order by 1 desc diff --git a/KPIs/revenue_per_item.sql b/KPIs/revenue_per_item.sql index 6163adf..4ccc619 100644 --- a/KPIs/revenue_per_item.sql +++ b/KPIs/revenue_per_item.sql @@ -15,9 +15,9 @@ WITH items as WHERE e.productCategory = 'REAL_CURRENCY' ORDER BY itemName) SELECT itemName, - sum(productAmount) as 'items sold', + sum(productAmount) as "items sold", round(sum(convertedProductAmount)/100,2.0) as revenue, count(*) as sales FROM spendings GROUP BY itemName -order by "revenue" desc +order by revenue desc diff --git a/KPIs/smartads_and_iap_ltv.sql b/KPIs/smartads_and_iap_ltv.sql index d6b2a15..a7abc2b 100644 --- a/KPIs/smartads_and_iap_ltv.sql +++ b/KPIs/smartads_and_iap_ltv.sql @@ -6,15 +6,15 @@ with data as ( gaUserStartdate, count(distinct userId) as userCount, sum(case when revenueValidated in (0,1) then convertedProductAmount/100 end) as IapRevenue, - sum(case when eventname = 'adClosed' and adstatus = 'Success' then adEcpm/100000 end) as PredictedAdRevenue + sum(case when eventname = "adClosed" and adstatus = "Success" then adEcpm/100000 end) as PredictedAdRevenue from events where gaUserStartDate>= (select min(eventDate) from events) group by gaUserStartdate) select - gaUserStartDate as 'install date', - round(IapRevenue/userCount,4)::float as 'IAP LTV', - round(PredictedAdRevenue/userCount,4)::float as 'Predicted Ad LTV', - round((PredictedAdRevenue+IapRevenue)/userCount,4)::float as 'total LTV', - userCount as 'Number of players' + gaUserStartDate as "install date", + round(IapRevenue/userCount,4)::float as "IAP LTV", + round(PredictedAdRevenue/userCount,4)::float as "Predicted Ad LTV", + round((PredictedAdRevenue+IapRevenue)/userCount,4)::float as "total LTV", + userCount as "Number of players" from data order by gaUserStartDate diff --git a/Paradigms/New Players by Time of day in local time.sql b/Paradigms/New Players by Time of day in local time.sql index 0d82f1d..0dbf344 100644 --- a/Paradigms/New Players by Time of day in local time.sql +++ b/Paradigms/New Players by Time of day in local time.sql @@ -12,17 +12,15 @@ WITH firstValues AS (SELECT tzo, startTs, TIMESTAMPADD(MINUTE, CASE - WHEN CHAR_LENGTH(TZO)=5 THEN CAST(substring(tzo, 1,1)||'1' AS INTEGER) *-- get positive vs negative tzoffset + WHEN LEN(TZO)=5 THEN CAST(substring(tzo, 1,1)||'1' AS INTEGER) *-- get positive vs negative tzoffset (CAST(substring(tzo, 2,2) AS INTEGER)*60 + cast(substring(tzo, 4,2) AS INTEGER)) ELSE NULL --get tzoffset in minutes - END, startTs) AS localEventTimestamp, CASE - WHEN CHAR_LENGTH(TZO)=5 THEN CAST(substring(tzo, 1,1)||'1' AS INTEGER) *-- get positive vs negative tzoffset + WHEN LEN(TZO)=5 THEN CAST(substring(tzo, 1,1)||'1' AS INTEGER) *-- get positive vs negative tzoffset (CAST(substring(tzo, 2,2) AS INTEGER)*60 + cast(substring(tzo, 4,2) AS INTEGER)) ELSE NULL --get tzoffset in minutes - END AS minutes FROM firstValues) SELECT * -FROM results +FROM results \ No newline at end of file diff --git a/Paradigms/Percentage spenders over time.sql b/Paradigms/Percentage spenders over time.sql index ec6d8db..46e9a28 100644 --- a/Paradigms/Percentage spenders over time.sql +++ b/Paradigms/Percentage spenders over time.sql @@ -2,11 +2,10 @@ -- The first CTE pulls the date range -- The second CTE pulls player start dates and first payment dates -- The final part of the query pulls it all togteher -WITH dates AS - ( SELECT DAY::date - FROM - (SELECT (CURRENT_DATE-30)::TIMESTAMP AS date_range - UNION SELECT CURRENT_DATE::TIMESTAMP)AS ts timeseries DAY AS '1 days' over (ORDER BY date_range)) +WITH dates AS ( +SELECT DISTINCT EVENTDATE AS DAY +FROM events +WHERE EVENTDATE BETWEEN current_date - 30 AND current_date), , players AS ( SELECT user_id , MIN(event_date) as startDate diff --git a/Paradigms/SessionLength differences.sql b/Paradigms/SessionLength differences.sql index 7c72c53..ca8ede0 100644 --- a/Paradigms/SessionLength differences.sql +++ b/Paradigms/SessionLength differences.sql @@ -11,7 +11,23 @@ WITH DATA AS FROM EVENTS GROUP BY sessionId) SELECT sessionId, - (sessionLength|| ' ms')::interval AS sessionLength, - (compensatedSessionLength|| ' ms')::interval AS compensatedSessionLength, + case -- format time as an interval + when sessionTime < 0 then '-' || TO_CHAR(TRUNC(ABS(sessionLength)/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(ABS(sessionLength), 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(ABS(sessionLength), 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(ABS(sessionLength), 1000), 'FM000') + else TO_CHAR(TRUNC(sessionLength/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(sessionLength, 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(sessionLength, 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(sessionLength, 1000), 'FM000') end AS sessionLength, + case + when compensatedSessionLength < 0 then '-' || TO_CHAR(TRUNC(ABS(compensatedSessionLength)/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(ABS(compensatedSessionLength), 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(ABS(compensatedSessionLength), 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(ABS(compensatedSessionLength), 1000), 'FM000') + else TO_CHAR(TRUNC(compensatedSessionLength/ 3600000), 'FM9900') || ':' || + TO_CHAR(TRUNC(mod(compensatedSessionLength, 3600000)/ 60000), 'FM00') || ':' || + TO_CHAR(TRUNC(mod(compensatedSessionLength, 60000)/ 1000), 'FM00') || '.' || + TO_CHAR(mod(compensatedSessionLength, 1000), 'FM000') end AS compensatedSessionLength, eventCount FROM DATA diff --git a/Paradigms/concurrency.sql b/Paradigms/concurrency.sql index 8d5e913..9e9e710 100644 --- a/Paradigms/concurrency.sql +++ b/Paradigms/concurrency.sql @@ -1,20 +1,14 @@ ---get the number of users who were in a session at a given timeslot: with timeslots as( - select - slice_time - from - ( - select now()-interval '6 hours' as date_range - union - select now() - ) as ts timeseries slice_time as '1 minutes' over(order by date_range) + SELECT DISTINCT TIME_SLICE(EVENTTIMESTAMP, 1, 'MINUTE') AS slice_time +FROM events +WHERE EVENTTIMESTAMP BETWEEN CURRENT_TIMESTAMP - INTERVAL '6 hour' AND CURRENT_TIMESTAMP ), data as( select userId, sessionId, min(eventTimestamp)startTime, max(eventTimestamp)endTime -from events where eventTimestamp between now()-interval '6 hours' and now() +from events where eventTimestamp between current_timestamp()-interval '6 hours' and current_timestamp() group by 1,2 ) select slice_time, count(distinct userId) from timeslots ts left join data d on ts.slice_time between d.startTime and d.endTime group by slice_time -order by slice_time desc +order by slice_time desc \ No newline at end of file diff --git a/Paradigms/weekly_retention.sql b/Paradigms/weekly_retention.sql index c085362..5171719 100644 --- a/Paradigms/weekly_retention.sql +++ b/Paradigms/weekly_retention.sql @@ -11,7 +11,7 @@ group by user_id, trunc(event_Date, 'D') order by week_commencing ) select (case when first_week=week_commencing or last_week=-14 then week_commencing else week_commencing+interval '7 days' end)::Date as w -, sum(case when next_week=7 and first_week!=week_commencing then 1 else 0 end) current +, sum(case when next_week=7 and first_week!=week_commencing then 1 else 0 end) currentWeek , sum(case when coalesce(next_week,0)!=7 and first_week!=week_commencing then -1 else 0 end) churned , sum(case when first_week=week_commencing then 1 else 0 end) new_players , sum(case when last_week = -14 then 1 else 0 end) returning diff --git a/QA/Ghostevents_vs_gameplay_events.sql b/QA/Ghostevents_vs_gameplay_events.sql index da1ab3f..beb2899 100644 --- a/QA/Ghostevents_vs_gameplay_events.sql +++ b/QA/Ghostevents_vs_gameplay_events.sql @@ -1,9 +1,9 @@ --Get the number of events without a session and the ones with a session by eventDate. --At least some events should come from the client and have a sessionID set. SELECT eventDate, - count(*)-count(sessionId) AS 'sessionless events', - count(sessionID) AS 'session events', - round((count(*)-count(sessionId))/count(*) * 100,2.0) AS 'percentage' + count(*)-count(sessionId) AS "sessionless events", + count(sessionID) AS "session events", + round((count(*)-count(sessionId))/count(*) * 100,2.0) AS "percentage" FROM EVENTS GROUP BY eventDate ORDER BY eventDate DESC diff --git a/QA/event_order.sql b/QA/event_order.sql index a8dfbbc..72b841d 100644 --- a/QA/event_order.sql +++ b/QA/event_order.sql @@ -1,16 +1,10 @@ --In order for session times to be measured accurately and for first and last value metrics to work okay events need to come in in the order they occured in. --Since the device time might not be 100% accurate and can automatically be adjusted it could occasionally happen that an event has a timestamp slightly before the next one, this should only happen in up to 1%of the events. WITH eventData AS - (SELECT userId, - eventDate, - eventTimestamp, - lag(eventTimestamp) over(partition BY userId - ORDER BY eventId), - EXTRACT(EPOCH - FROM eventTimestamp - lag(eventTimestamp) over(partition BY userId - ORDER BY eventId)) AS secondsdiff - FROM EVENTS - WHERE sessionId IS NOT NULL--exclude ghost events +(SELECT userId, eventDate, eventTimestamp, lag(eventTimestamp) over(partition BY userId ORDER BY eventId) as lagTime, +timediff(s, lagTime, eventTimestamp) AS secondsdiff +FROM EVENTS +WHERE sessionId IS NOT NULL--exclude ghost events ), aggregates AS (SELECT eventDate, diff --git a/QA/events_per_dau.sql b/QA/events_per_dau.sql index 1ac6914..c94b429 100644 --- a/QA/events_per_dau.sql +++ b/QA/events_per_dau.sql @@ -1,8 +1,8 @@ --Get the number of events per user and the sample size. Should be below 100 on average. SELECT eventDate, - count(*)/count(DISTINCT userId) AS 'Events per DAU', - count(DISTINCT userId) AS 'Sample Size' + count(*)/count(DISTINCT userId) AS "Events per DAU", + count(DISTINCT userId) AS "Sample Size" FROM EVENTS GROUP BY eventDate ORDER BY eventDate DESC diff --git a/QA/seconds_between_events.sql b/QA/seconds_between_events.sql index 06c4263..0d66e0c 100644 --- a/QA/seconds_between_events.sql +++ b/QA/seconds_between_events.sql @@ -15,12 +15,12 @@ WITH first_day_events AS gaUserStartDate, msSinceLastEvent FROM first_day_events - WHERE eventTimestamp - startTimeStamp<=interval '5 minute'-- check first 5 minutes + WHERE timestampdiff(minute, startTimeStamp, eventTimeStamp) < 5 -- check first 5 minutes ) -SELECT gaUserStartDate AS 'Date', - avg(msSinceLastEvent/1000)AS 'Average number of seconds between events', - count(*) as 'Number of events', - count(distinct userId) as 'Number of users' +SELECT gaUserStartDate AS "Date", + avg(msSinceLastEvent/1000)AS "Average number of seconds between events", + count(*) as "Number of events", + count(distinct userId) as "Number of users" FROM firstMinutes GROUP BY 1 ORDER BY 1 DESC diff --git a/README.md b/README.md index 1f6cf0e..eb119e8 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Common SQL recipes and best practises ## Purpose -This repository aims to provide a number of best practises and ideas on how to construct queries that provide interesting data in a way that works well with our platform. +This repository aims to provide a number of best practises and ideas on how to construct queries that provide interesting data in a way that works well with our platform. The queries have all been verified on games implementing the Snowflake data warehouse. Comments within the query will indicate any specific cases where a query cannot be run on the original Vertica warehouse. Use these queries as you please for educational purposes and to adjust them to suit your specific needs.