6
6
rmdformats::readthedown :
7
7
toc_depth : 3
8
8
use_bookdown : yes
9
+ css : styles.css
9
10
10
11
html_document :
11
12
fig_caption : yes
@@ -100,6 +101,16 @@ We now write this data out as a CSV so `arules` can read it in and process it.
100
101
tnx_purchase_tbl %>% write_csv("data/tnx_purchase_tbl.csv")
101
102
```
102
103
104
+ We also want to load the free-text description of the various stock items as
105
+ this will help will interpretation of the various rules we have found.
106
+
107
+ ``` {r load_stock_code_descriptions, echo=TRUE}
108
+ stock_code_lookups_tbl <- read_rds("data/stock_code_lookup_tbl.rds")
109
+
110
+ stock_code_lookups_tbl %>% glimpse()
111
+ ```
112
+
113
+
103
114
104
115
105
116
# Basket Analysis with Association Rules
202
213
So, to calculate the confidence for a rule:
203
214
204
215
$$
205
- \text{Supp }(\text{\{milk, bread\}} \Rightarrow \text{\{butter\}}) = \frac{0.2}{0.4} = 0.5.
216
+ \text{Conf }(\text{\{milk, bread\}} \Rightarrow \text{\{butter\}}) = \frac{0.2}{0.4} = 0.5.
206
217
$$
207
218
208
219
To illustrate how rules are not symmetric:
209
220
210
221
$$
211
- \text{Supp }(\text{\{butter\}} \Rightarrow \text{\{milk, bread\}}) = \frac{0.2}{0.6} = 0.33.
222
+ \text{Conf }(\text{\{butter\}} \Rightarrow \text{\{milk, bread\}}) = \frac{0.2}{0.6} = 0.33.
212
223
$$
213
224
214
225
@@ -385,7 +396,7 @@ apriori_rules_igraph <- basket_apriori %>%
385
396
) %>%
386
397
as("igraph")
387
398
388
- apriori_rules_igraph %>% glimpse ()
399
+ apriori_rules_igraph %>% print ()
389
400
```
390
401
391
402
We should first visualise this graph, using the top 30 rules in the dataset,
@@ -434,7 +445,7 @@ product_groups_all_tbl <- apriori_rules_tblgraph %>%
434
445
product_count = n()
435
446
) %>%
436
447
ungroup() %>%
437
- select(product_group_id = component_id, stock_code = label) %>%
448
+ select(product_group_id = component_id, product_count, stock_code = label) %>%
438
449
arrange(product_group_id, stock_code)
439
450
440
451
product_groups_all_tbl %>% glimpse()
@@ -488,7 +499,7 @@ product_groups_largest_tbl <- apriori_rules_large_tblgraph %>%
488
499
product_count = n()
489
500
) %>%
490
501
ungroup() %>%
491
- select(product_group_id = sub_id, stock_code = label) %>%
502
+ select(product_group_id = sub_id, product_count, stock_code = label) %>%
492
503
arrange(product_group_id, stock_code)
493
504
494
505
product_groups_largest_tbl %>% glimpse()
@@ -573,7 +584,7 @@ group_props_tbl <- arules_groups_tbl %>%
573
584
filter(group_size > 1, group_size < 15) %>%
574
585
expand_grid(tnx_groups_tbl) %>%
575
586
mutate(
576
- comb_data = future_map2(
587
+ comb_data = future_map2(
577
588
invoice_data, stock_data,
578
589
inner_join,
579
590
by = "stock_code",
@@ -606,6 +617,21 @@ ggplot(plot_tbl) +
606
617
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
607
618
```
608
619
620
+
621
+ ### Investigate Groups
622
+
623
+ Now that we have our groups we add on the description fields so that
624
+ interpretation of the different groupings is easier.
625
+
626
+ ``` {r display_stock_groups, echo=TRUE}
627
+ stock_groups_tbl %>%
628
+ filter(group_size > 1, group_size < 15) %>%
629
+ mutate(stock_code_upr = stock_code %>% str_trim() %>% str_to_upper()) %>%
630
+ left_join(stock_code_lookups_tbl, by = "stock_code_upr") %>%
631
+ datatable()
632
+ ```
633
+
634
+
609
635
### Write Product Groups
610
636
611
637
As this may be useful for later analysis and for later modelling, we output
@@ -616,6 +642,138 @@ stock_groups_tbl %>% write_rds("data/stock_groups_tbl.rds")
616
642
```
617
643
618
644
645
+
646
+ # Investigate Lower Support Rules
647
+
648
+ Our previous analysis was all based on rules with a minimum confidence of 0.80
649
+ so we now want to repeat our analysis but on this new set of rules.
650
+
651
+
652
+ ``` {r construct_lower_rules_graph, echo=TRUE}
653
+ apriori_lower_rules_igraph <- basket_lower_rules %>%
654
+ plot(
655
+ measure = "support",
656
+ method = "graph",
657
+ control = list(max = 5000)
658
+ ) %>%
659
+ as("igraph")
660
+
661
+ apriori_lower_rules_igraph %>% glimpse()
662
+ ```
663
+
664
+ Once again we have a quick look at the top 50 rules.
665
+
666
+ ``` {r visual_inspection_rules_igraph, echo=TRUE}
667
+ basket_lower_rules %>%
668
+ head(n = 50, by = "support") %>%
669
+ plot(
670
+ measure = "lift",
671
+ method = "graph",
672
+ engine = "htmlwidget"
673
+ )
674
+ ```
675
+
676
+
677
+
678
+ ## Determine Distinct Rules Subgraphs
679
+
680
+ Having converted the association rules to the graph, we then look at the
681
+ distinct components of this graph and use these as our first pass at these
682
+ clusters.
683
+
684
+ ``` {r create_lower_component_labels, echo=TRUE}
685
+ apriori_lower_rules_tblgraph <- apriori_lower_rules_igraph %>%
686
+ as_tbl_graph() %>%
687
+ mutate(
688
+ component_id = group_components()
689
+ ) %>%
690
+ group_by(component_id) %>%
691
+ mutate(
692
+ component_size = n()
693
+ ) %>%
694
+ ungroup()
695
+
696
+ apriori_lower_rules_tblgraph %>% print()
697
+ ```
698
+
699
+
700
+ ``` {r construct_product_groups_all, echo=TRUE}
701
+ product_groups_lower_all_tbl <- apriori_lower_rules_tblgraph %>%
702
+ activate(nodes) %>%
703
+ as_tibble() %>%
704
+ filter(are_na(support)) %>%
705
+ group_by(component_id) %>%
706
+ mutate(
707
+ product_count = n()
708
+ ) %>%
709
+ ungroup() %>%
710
+ select(product_group_id = component_id, product_count, stock_code = label) %>%
711
+ arrange(product_group_id, stock_code)
712
+
713
+ product_groups_lower_all_tbl %>% glimpse()
714
+ ```
715
+
716
+ ``` {r construct_largest_subgraph_groups, echo=TRUE, cache=TRUE}
717
+ apriori_lower_rules_bigcomp_tblgraph <- apriori_lower_rules_tblgraph %>%
718
+ to_subgraph(component_size == max(component_size)) %>%
719
+ use_series(subgraph) %>%
720
+ morph(to_undirected) %>%
721
+ mutate(
722
+ sub_id = group_edge_betweenness()
723
+ ) %>%
724
+ unmorph()
725
+ ```
726
+
727
+
728
+ ``` {r convert_subgraph_groups_tbl}
729
+ product_groups_lower_bigcomp_tbl <- apriori_lower_rules_bigcomp_tblgraph %>%
730
+ activate(nodes) %>%
731
+ as_tibble() %>%
732
+ filter(are_na(support)) %>%
733
+ group_by(sub_id) %>%
734
+ mutate(
735
+ product_count = n()
736
+ ) %>%
737
+ ungroup() %>%
738
+ select(product_group_id = sub_id, product_count, stock_code = label) %>%
739
+ arrange(product_group_id, stock_code)
740
+
741
+ product_groups_lower_bigcomp_tbl %>% glimpse()
742
+ ```
743
+
744
+
745
+ ``` {r construct_combined_data_groups_lower, echo=TRUE}
746
+ stock_groups_lower_tbl <- list(
747
+ ALL = product_groups_lower_all_tbl,
748
+ LRG = product_groups_lower_bigcomp_tbl
749
+ ) %>%
750
+ bind_rows(.id = "type") %>%
751
+ mutate(
752
+ group_label = sprintf("%s_%02d", type, product_group_id)
753
+ ) %>%
754
+ group_by(group_label) %>%
755
+ mutate(
756
+ group_size = n()
757
+ ) %>%
758
+ ungroup() %>%
759
+ select(group_label, group_size, stock_code)
760
+
761
+ stock_groups_lower_tbl %>% glimpse()
762
+ ```
763
+
764
+
765
+ Now that we have our groups we add on the description fields so that
766
+ interpretation of the different groupings is easier.
767
+
768
+ ``` {r display_lower_stock_groups, echo=TRUE}
769
+ stock_groups_lower_tbl %>%
770
+ filter(group_size > 1, group_size != max(group_size)) %>%
771
+ mutate(stock_code_upr = stock_code %>% str_trim() %>% str_to_upper()) %>%
772
+ left_join(stock_code_lookups_tbl, by = "stock_code_upr") %>%
773
+ datatable()
774
+ ```
775
+
776
+
619
777
# R Environment
620
778
621
779
``` {r show_session_info, echo=TRUE, message=TRUE}
0 commit comments