diff --git a/datasets/checkout.csv b/datasets/checkout.csv new file mode 100644 index 0000000..5e5f949 --- /dev/null +++ b/datasets/checkout.csv @@ -0,0 +1,41 @@ +user_id,page +90,checkout_page +88,checkout_page +67,checkout_page +61,checkout_page +76,checkout_page +55,checkout_page +59,checkout_page +26,checkout_page +8,checkout_page +19,checkout_page +71,checkout_page +22,checkout_page +89,checkout_page +76,checkout_page +45,checkout_page +81,checkout_page +30,checkout_page +32,checkout_page +33,checkout_page +90,checkout_page +40,checkout_page +94,checkout_page +5,checkout_page +3,checkout_page +30,checkout_page +49,checkout_page +37,checkout_page +50,checkout_page +19,checkout_page +36,checkout_page +62,checkout_page +15,checkout_page +49,checkout_page +39,checkout_page +82,checkout_page +64,checkout_page +58,checkout_page +81,checkout_page +89,checkout_page +84,checkout_page diff --git a/datasets/home.csv b/datasets/home.csv new file mode 100644 index 0000000..0d69c86 --- /dev/null +++ b/datasets/home.csv @@ -0,0 +1,61 @@ +user_id,page +88,home_page +66,home_page +62,home_page +88,home_page +59,home_page +51,home_page +85,home_page +79,home_page +93,home_page +64,home_page +94,home_page +79,home_page +52,home_page +95,home_page +100,home_page +67,home_page +77,home_page +82,home_page +59,home_page +85,home_page +57,home_page +83,home_page +93,home_page +77,home_page +51,home_page +55,home_page +59,home_page +87,home_page +59,home_page +55,home_page +69,home_page +88,home_page +82,home_page +78,home_page +56,home_page +92,home_page +96,home_page +52,home_page +77,home_page +84,home_page +81,home_page +95,home_page +50,home_page +53,home_page +97,home_page +93,home_page +72,home_page +85,home_page +55,home_page +89,home_page +94,home_page +73,home_page +98,home_page +76,home_page +74,home_page +70,home_page +68,home_page +50,home_page +61,home_page +61,home_page diff --git a/datasets/location.csv b/datasets/location.csv new file mode 100644 index 0000000..e1a09fe --- /dev/null +++ b/datasets/location.csv @@ -0,0 +1,101 @@ +user_id,state,country +1,Texas,United States +2,Indiana,United States +3,Georgia,United States +4,Florida,United States +5,Virginia,United States +6,Oklahoma,United States +7,Florida,United States +8,Pennsylvania,United States +9,Connecticut,United States +10,Michigan,United States +11,Texas,United States +12,Texas,United States +13,New York,United States +14,South Carolina,United States +15,Texas,United States +16,Michigan,United States +17,Florida,United States +18,Texas,United States +19,South Dakota,United States +20,Michigan,United States +21,South Carolina,United States +22,New York,United States +23,California,United States +24,Georgia,United States +25,California,United States +26,Alabama,United States +27,Washington,United States +28,Texas,United States +29,District of Columbia,United States +30,Texas,United States +31,Illinois,United States +32,Texas,United States +33,District of Columbia,United States +34,Pennsylvania,United States +35,Alaska,United States +36,New York,United States +37,Pennsylvania,United States +38,California,United States +39,Illinois,United States +40,Indiana,United States +41,Washington,United States +42,California,United States +43,South Carolina,United States +44,New Mexico,United States +45,Florida,United States +46,Alaska,United States +47,New York,United States +48,Texas,United States +49,North Carolina,United States +50,Pennsylvania,United States +51,Illinois,United States +52,Texas,United States +53,Alaska,United States +54,Colorado,United States +55,North Carolina,United States +56,New York,United States +57,Florida,United States +58,New York,United States +59,Nebraska,United States +60,Ohio,United States +61,Ohio,United States +62,Ohio,United States +63,Oklahoma,United States +64,Oklahoma,United States +65,Texas,United States +66,Ohio,United States +67,Texas,United States +68,Texas,United States +69,Texas,United States +70,Florida,United States +71,Washington,United States +72,Nevada,United States +73,Illinois,United States +74,Florida,United States +75,Arkansas,United States +76,Oklahoma,United States +77,Missouri,United States +78,Utah,United States +79,California,United States +80,California,United States +81,California,United States +82,California,United States +83,New York,United States +84,Virginia,United States +85,Indiana,United States +86,Pennsylvania,United States +87,Nevada,United States +88,California,United States +89,Texas,United States +90,Kansas,United States +91,Georgia,United States +92,North Carolina,United States +93,California,United States +94,Pennsylvania,United States +95,California,United States +96,New York,United States +97,Kentucky,United States +98,Georgia,United States +99,California,United States +100,Texas,United States diff --git a/datasets/payment_confirm.csv b/datasets/payment_confirm.csv new file mode 100644 index 0000000..990cf7c --- /dev/null +++ b/datasets/payment_confirm.csv @@ -0,0 +1,21 @@ +user_id,page +85,payment_confirmation_page +41,payment_confirmation_page +10,payment_confirmation_page +21,payment_confirmation_page +60,payment_confirmation_page +100,payment_confirmation_page +70,payment_confirmation_page +100,payment_confirmation_page +6,payment_confirmation_page +92,payment_confirmation_page +64,payment_confirmation_page +3,payment_confirmation_page +59,payment_confirmation_page +89,payment_confirmation_page +36,payment_confirmation_page +52,payment_confirmation_page +31,payment_confirmation_page +68,payment_confirmation_page +62,payment_confirmation_page +24,payment_confirmation_page diff --git a/datasets/search.csv b/datasets/search.csv new file mode 100644 index 0000000..2f87bfd --- /dev/null +++ b/datasets/search.csv @@ -0,0 +1,61 @@ +user_id,page +17,search_page +5,search_page +40,search_page +6,search_page +13,search_page +12,search_page +13,search_page +27,search_page +24,search_page +3,search_page +24,search_page +5,search_page +23,search_page +24,search_page +37,search_page +18,search_page +22,search_page +38,search_page +35,search_page +7,search_page +32,search_page +14,search_page +31,search_page +34,search_page +35,search_page +32,search_page +4,search_page +4,search_page +11,search_page +9,search_page +33,search_page +9,search_page +19,search_page +11,search_page +23,search_page +5,search_page +27,search_page +1,search_page +1,search_page +34,search_page +10,search_page +34,search_page +32,search_page +24,search_page +32,search_page +30,search_page +31,search_page +18,search_page +33,search_page +18,search_page +6,search_page +15,search_page +11,search_page +22,search_page +19,search_page +11,search_page +7,search_page +40,search_page +34,search_page +23,search_page diff --git a/datasets/user.csv b/datasets/user.csv new file mode 100644 index 0000000..4587534 --- /dev/null +++ b/datasets/user.csv @@ -0,0 +1,101 @@ +user_id,device,gender,date +1,Mobile,M,2018-12-13 +2,Mobile,F,2018-03-27 +3,Mobile,F,2019-02-18 +4,Mobile,M,2018-04-27 +5,Mobile,F,2018-10-12 +6,Mobile,M,2018-06-06 +7,Desktop,M,2019-01-31 +8,Mobile,M,2018-04-14 +9,Mobile,M,2018-07-16 +10,Desktop,F,2018-12-26 +11,Mobile,M,2019-02-05 +12,Desktop,M,2018-03-24 +13,Mobile,M,2018-07-03 +14,Desktop,M,2018-03-25 +15,Desktop,F,2018-08-12 +16,Mobile,M,2018-07-03 +17,Desktop,F,2018-05-21 +18,Mobile,F,2018-04-25 +19,Mobile,F,2019-01-04 +20,Desktop,F,2018-04-30 +21,Mobile,M,2018-04-09 +22,Desktop,F,2018-05-10 +23,Desktop,F,2018-06-27 +24,Mobile,F,2018-07-14 +25,Desktop,F,2018-07-17 +26,Desktop,F,2018-05-23 +27,Mobile,M,2018-10-28 +28,Desktop,F,2018-12-12 +29,Desktop,F,2018-06-30 +30,Mobile,M,2019-01-19 +31,Mobile,F,2018-04-03 +32,Mobile,F,2018-10-28 +33,Mobile,F,2018-12-18 +34,Desktop,F,2018-08-30 +35,Desktop,F,2018-07-31 +36,Desktop,F,2019-01-09 +37,Desktop,F,2018-07-20 +38,Desktop,M,2018-12-29 +39,Mobile,F,2018-11-19 +40,Desktop,M,2018-07-04 +41,Desktop,F,2019-02-10 +42,Mobile,F,2018-07-19 +43,Desktop,F,2018-12-06 +44,Desktop,M,2019-01-13 +45,Mobile,M,2018-08-04 +46,Mobile,M,2018-05-15 +47,Mobile,F,2018-06-15 +48,Mobile,M,2019-02-11 +49,Mobile,F,2019-02-16 +50,Desktop,F,2018-11-09 +51,Mobile,F,2019-02-21 +52,Mobile,F,2018-08-22 +53,Desktop,F,2018-05-19 +54,Mobile,M,2018-08-21 +55,Desktop,F,2018-11-08 +56,Mobile,M,2018-12-10 +57,Desktop,M,2018-06-22 +58,Desktop,F,2018-09-15 +59,Desktop,M,2018-10-06 +60,Mobile,M,2018-09-23 +61,Mobile,F,2018-10-27 +62,Mobile,F,2018-08-30 +63,Mobile,M,2018-11-16 +64,Desktop,M,2018-03-13 +65,Desktop,F,2018-03-29 +66,Mobile,M,2018-07-11 +67,Mobile,M,2018-12-04 +68,Mobile,M,2018-07-09 +69,Mobile,M,2018-11-29 +70,Mobile,F,2018-09-06 +71,Desktop,F,2019-02-18 +72,Desktop,M,2019-01-28 +73,Desktop,F,2018-12-25 +74,Mobile,F,2018-10-30 +75,Desktop,F,2018-04-02 +76,Desktop,M,2018-04-18 +77,Mobile,F,2019-02-08 +78,Desktop,M,2018-05-26 +79,Desktop,M,2018-07-31 +80,Desktop,M,2018-03-15 +81,Mobile,F,2018-11-18 +82,Mobile,M,2018-04-02 +83,Mobile,M,2018-10-12 +84,Desktop,M,2018-07-20 +85,Mobile,M,2018-02-28 +86,Desktop,M,2018-09-11 +87,Mobile,F,2018-03-05 +88,Mobile,F,2019-01-14 +89,Mobile,M,2018-04-18 +90,Desktop,M,2018-11-04 +91,Mobile,M,2019-02-21 +92,Desktop,M,2019-01-22 +93,Desktop,M,2019-01-22 +94,Desktop,F,2018-10-17 +95,Mobile,F,2018-12-03 +96,Mobile,M,2018-08-21 +97,Desktop,M,2018-12-04 +98,Mobile,F,2018-03-16 +99,Desktop,F,2019-02-07 +100,Mobile,M,2018-09-02 diff --git a/load.cql b/load.cql index 67f4b1c..43ac4b8 100644 --- a/load.cql +++ b/load.cql @@ -1,6 +1,6 @@ // Assert that the USER nodes are unique by it's ID -CREATE CONSTRAINT ON (u:User) ASSERT c.user_id IS UNIQUE; +CREATE CONSTRAINT ON (u:User) ASSERT u.user_id IS UNIQUE; // Assert that the ENTRY nodes are unique by it's page name CREATE CONSTRAINT ON (e:Entry) ASSERT e.page IS UNIQUE; @@ -8,13 +8,13 @@ CREATE CONSTRAINT ON (e:Entry) ASSERT e.page IS UNIQUE; // LOAD user data USING PERIODIC COMMIT LOAD CSV WITH HEADERS -FROM 'file:///ecommerce-funnel-analysis/user_table.csv' AS line +FROM 'file:///Users/dhritiman/vm_shared_data/ecommerce-funnel-analysis/funnel/user.csv' AS line WITH DISTINCT line , datetime(line.`date`).epochMillis as date CREATE (u:User { id: toInteger(line.`user_id`) }) SET u.date = toInteger(date), u.device = line.`device`, - u.gender = line.`sex` + u.gender = line.`gender` // create search node CREATE (e:Entry { page : 'search_page'}) @@ -22,8 +22,8 @@ CREATE (e:Entry { page : 'search_page'}) // Load search page entry and create relationships USING PERIODIC COMMIT LOAD CSV WITH HEADERS -FROM 'file:///ecommerce-funnel-analysis/search_page_table.csv' AS line -WITH DISTINCT line +FROM 'file:///Users/dhritiman/vm_shared_data/ecommerce-funnel-analysis/funnel/search.csv' AS line +WITH line MATCH (u:User { id : toInteger(line.`user_id`)}), (e:Entry { page : 'search_page'}) MERGE (u)-[r:VISITED]->(e) @@ -32,15 +32,15 @@ MERGE (u)-[r:VISITED]->(e) // create payment node -CREATE (e:Entry { page : 'payment_page'}) +CREATE (e:Entry { page : 'checkout_page'}) // Create user and payment page relationship USING PERIODIC COMMIT LOAD CSV WITH HEADERS -FROM 'file:///ecommerce-funnel-analysis/payment_page_table.csv' AS line -WITH DISTINCT line +FROM 'file:///Users/dhritiman/vm_shared_data/ecommerce-funnel-analysis/funnel/checkout.csv' AS line +WITH line -MATCH (u:User { id : toInteger(line.`user_id`)}), (e:Entry { page : 'payment_page'}) +MATCH (u:User { id : toInteger(line.`user_id`)}), (e:Entry { page : 'checkout_page'}) MERGE (u)-[r:VISITED]->(e) ON MATCH SET r.visited_count = r.visited_count + 1 ON CREATE SET r.visited_count = 1 @@ -52,7 +52,7 @@ CREATE (e:Entry { page : 'home_page'}) // Create user and payment page relationship USING PERIODIC COMMIT LOAD CSV WITH HEADERS -FROM 'file:///ecommerce-funnel-analysis/home_page_table.csv' AS line +FROM 'file:///Users/dhritiman/vm_shared_data/ecommerce-funnel-analysis/funnel/home.csv' AS line WITH DISTINCT line MATCH (u:User { id : toInteger(line.`user_id`)}), (e:Entry { page : 'home_page'}) @@ -67,21 +67,20 @@ CREATE (e:Entry { page : 'payment_confirmation_page'}) // Create user and payment ocnfirmation page relationship USING PERIODIC COMMIT LOAD CSV WITH HEADERS -FROM 'file:///ecommerce-funnel-analysis/payment_confirmation_table.csv' AS line -WITH DISTINCT line +FROM 'file:///Users/dhritiman/vm_shared_data/ecommerce-funnel-analysis/funnel/payment_confirm.csv' AS line +WITH line -MATCH (u:User { id : toInteger(line.`user_id`)}), (e:Entry { page : 'payement_confirmation_page'}) +MATCH (u:User { id : toInteger(line.`user_id`)}), (e:Entry { page : 'payment_confirmation_page'}) MERGE (u)-[r:VISITED]->(e) ON MATCH SET r.visited_count = r.visited_count + 1 ON CREATE SET r.visited_count = 1 -// Add order's and total purchase data to user's nodes USING PERIODIC COMMIT LOAD CSV WITH HEADERS -FROM 'file:///user_order_purchase.csv' AS line -WITH DISTINCT line +FROM 'file:///Users/dhritiman/vm_shared_data/ecommerce-funnel-analysis/funnel/location.csv' AS line +WITH line -MATCH (u:User { id: toInteger(line.`user_id`) }) -SET u.total_orders = line.`total_orders`, - u.total_amount = line.`total_amount` +MATCH (u:User { id : toInteger(line.`user_id`)}) +MERGE (u)-[r:VISITED]->(e) + ON MATCH SET r.state = line.`state` , r.country = line.`country` \ No newline at end of file diff --git a/node_rel_analysis.cql b/node_rel_analysis.cql deleted file mode 100644 index f4259dc..0000000 --- a/node_rel_analysis.cql +++ /dev/null @@ -1,36 +0,0 @@ - -// Count the number of users visited search page -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) - RETURN count(distinct(r)) - -// Count the number of users visited home page -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'home_page'}) - RETURN count(distinct(r)) - -// Count the number of users visited home page -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_page'}) - RETURN count(distinct(r)) - -// Returns distinct user's list who visited payment confirmation page -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) - RETURN distinct(u) limit 1 - - -// List user's who visited search page as well as home page -MATCH (e1:Entry {page : 'search_page'})<-[r1:VISITED]-(u:User)-[r2:VISITED]->(e2:Entry { page : 'home_page'}) RETURN u - -// Group User's based on different properties for different page visits :: - -// 1. Search Page : - -// 1.1. Gender Grouping -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) - RETURN u.gender , count(u.gender) AS total - -// 1.2. Device Grouping -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) - RETURN u.device , count(u.device) AS total - -// 1.3. Device and Gender Grouping -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) - RETURN u.device, u.gender , count(u) AS total ORDER BY count(u) DESC diff --git a/payment.cql b/payment.cql new file mode 100644 index 0000000..8523f38 --- /dev/null +++ b/payment.cql @@ -0,0 +1,16 @@ +// Total number of successfull payments +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) + RETURN count(distinct(r)) + +// Total number of successful payments based on gender and device +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) + RETURN u.gender as gender , u.device as device , count(u) as total ORDER BY count(u) DESC + +// Total number of successful payments based on gender | device +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) + RETURN u.gender as gender , count(u) as total ORDER BY count(u) DESC + + +// Total number of successful payments based on location +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) + RETURN r.state as state, r.country as country, count(u) as total ORDER BY count(u) DESC diff --git a/payment_node_rel_analysis.cql b/payment_node_rel_analysis.cql deleted file mode 100644 index 4803344..0000000 --- a/payment_node_rel_analysis.cql +++ /dev/null @@ -1,7 +0,0 @@ -// Total number of successful payments -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) - RETURN count(distinct(r)) - -// Total number of successful payments based on gender and device -MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) - RETURN u.gender as gender , u.device as device , count(u) as total ORDER BY count(u) DESC diff --git a/user_node_analysis.cql b/user_node.cql similarity index 51% rename from user_node_analysis.cql rename to user_node.cql index b3bc878..32fd696 100644 --- a/user_node_analysis.cql +++ b/user_node.cql @@ -14,16 +14,4 @@ MATCH (u:User { gender : 'Male', platform : 'Mobile'}) RETURN count(u) MATCH (u:User) RETURN u.gender AS gender, count(u) AS total // Group user's by device and take it's count -MATCH (u:User) RETURN u.device AS device, u.gender as gender, count(u) AS total ORDER BY count(u) DESC - -// List the users based on the total amount spent -MATCH (u:User) RETURN u.id , u.total_amount as total_amount_spent ORDER BY u.total_amount DESC - -// Total amount spent based on gender -MATCH (u:User) RETURN u.gender as gender , SUM(u.total_amount) as total_amount_spent - -// Total amount spent based on gender -MATCH (u:User) RETURN u.gender as gender , SUM(u.total_amount) as total_amount_spent - -// Average amount spent by any individual based on gender -MATCH (u:User) RETURN u.gender as gender , AVG(u.total_amount) as total_amount_spent \ No newline at end of file +MATCH (u:User) RETURN u.device AS device, u.gender as gender, count(u) AS total ORDER BY count(u) DESC \ No newline at end of file diff --git a/user_page_visists.cql b/user_page_visists.cql new file mode 100644 index 0000000..af53435 --- /dev/null +++ b/user_page_visists.cql @@ -0,0 +1,75 @@ +// Count the number of users visited search page +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) + RETURN count(distinct(r)) + +// Count the number of users visited home page +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'home_page'}) + RETURN count(distinct(r)) + +// Count the number of users visited checkout page +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'checkout_page'}) + RETURN count(distinct(r)) + +// Count the number of users visited payment confirmation page +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'payment_confirmation_page'}) + RETURN count(distinct(r)) + +//Gender Grouping +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) + RETURN u.gender , count(u.gender) AS total + +// Device Grouping +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) + RETURN u.device , count(u.device) AS total + +// Device and Gender Grouping +MATCH (u:User)-[r:VISITED]->(e:Entry { page : 'search_page'}) + RETURN u.device, u.gender , count(u) AS total ORDER BY count(u) DESC + + +// Number of user's that visisted both search and home page +MATCH (e1:Entry { page : 'home_page'})<-[r2:VISITED]-(u:User)-[r1:VISITED]->(e2:Entry { page : 'search_page'}) RETURN count(distinct(u)) + +// Number of user's that visisted both search and home page +MATCH (e1:Entry { page : 'home_page'})<-[r2:VISITED]-(u:User)-[r1:VISITED]->(e2:Entry { page : 'checkout_page'}) RETURN count(distinct(u)) + +// Number of user's that visisted search or home page and checkout page +MATCH (e1:Entry)<-[r2:VISITED]-(u:User)-[r1:VISITED]->(e2:Entry) +WHERE (e1.page = 'home_page' OR e1.page = 'search_page') AND e2.page = 'checkout_page' +RETURN u + +// Number of users who vsisted home page and checkout page +MATCH (e1:Entry)<-[r2:VISITED]-(u:User)-[r1:VISITED]->(e2:Entry) +WHERE e1.page = 'home_page' AND e2.page <> 'checkout_page' +RETURN count(u) + +// Number of users who visited checkout page but not payment confirmation page +MATCH (u:User)-[:VISITED]->(:Entry { page : 'checkout_page'}) +WHERE NOT (u)-[:VISITED]->(:Entry { page : 'payment_confirmation_page'}) +RETURN count(u) + + +// Grouping page visit's based on different attributes +// 1. Page +MATCH (u:User)-[r:VISITED]->(e:Entry) + RETURN e.page as page, count(u) as total_users + +// 2. Gender +MATCH (u:User)-[r:VISITED]->(e:Entry) + RETURN u.gender as gender, count(u) as total_users + +// 3. Gender and page +MATCH (u:User)-[r:VISITED]->(e:Entry) + RETURN u.gender as gender, e.page as page, count(u) as total_users + +// 4. Page and device +MATCH (u:User)-[r:VISITED]->(e:Entry) + RETURN e.page as page, u.device as platform , count(u) as total_users + +// 5. Page and location +MATCH (u:User)-[r:VISITED]->(e:Entry) + RETURN e.page as page, r.state as state, r.country as country, count(u) as total_users ORDER BY count(u) DESC + +// 6. Location wise visits +MATCH (u:User)-[r:VISITED]->(e:Entry) + RETURN r.state as state, r.country as country, count(u) as total_users