-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfunctions.R
More file actions
137 lines (107 loc) · 4.14 KB
/
functions.R
File metadata and controls
137 lines (107 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
library(readr)
library(ggraph)
library(dplyr)
library(tidygraph)
library(networkD3)
library(ggthemes)
library(RColorBrewer)
library(scales)
library(lubridate)
#library(randomcoloR)
library(recommenderlab)
library(igraph)
make_graph <- function(tangled) {
tangled <- tangled %>% mutate(note = if_else(is.na(note), "",note))
# attempt to roll up the payments
money_types <- c("payment", "loan", "investment", "fine", "contribution")
tangled <- tangled %>% filter(e_type %in% money_types) %>%
mutate(amt = as.numeric(note)) %>%
group_by(from, to, e_type) %>%
summarize(date = last(date),
sum = sum(amt),
note = if_else(is.na(sum), last(note), format(sum, scientific = F))
) %>% bind_rows(
tangled %>% filter(!(e_type %in% money_types))
)
# make a new type that summarizes the original types
display_types <- tibble(
e_type = c("payment", "association", "investment", "loan", "fine", "verdict", "indictment", "contribution" ),
d_type = c("money", "contact", "money", "money", "money", "verdict", "indictment", "money")
)
tangled <- tangled %>%
full_join(display_types, by = c("e_type" = "e_type"))
graph <- as_tbl_graph(tangled) %>% mutate(group = as.character(group_spinglass()))
# the below few line will find the pagerank for all nodes, and use the
# max pagerank as the group label
g<-graph %>%
mutate(centrality = centrality_pagerank()) %>% activate(nodes) %>%
group_by(group) %>% mutate(g_max = max(centrality))
max_cent_df <- g %>% activate(nodes) %>%
as_tibble() %>%
group_by(group) %>%
summarize(g_max = max(centrality))
# the last summarize there handles ties
max_cent <- g %>% activate(nodes) %>% as_tibble()%>%
filter(centrality %in% max_cent_df$g_max) %>%
rename(group_label = name) %>% ungroup() %>%
group_by(group) %>%
arrange(g_max, desc(centrality), group_label) %>%
summarize(
group_label = first(group_label),
centrality = first(centrality),
g_max = first(g_max)
)
graph <- g %>% activate(nodes) %>%
inner_join(max_cent, by = c("group" = "group",
"g_max" = "centrality")) %>%
select(-g_max.y)
graph
}
weight_graph <- function(graph, in_group, out_group) {
# for FR layouts, let's set an edge weight: in group = 2, out of group = 1
get_group <- function(node, graph) {
graph %>% activate(nodes) %>% as_tibble() %>% filter(row_number() == node) %>% pull(group) %>% as.numeric()
}
weights <- graph %>% activate(edges) %>% as_tibble() %>% rowwise() %>%
mutate(the_group = if_else(get_group(to,graph) == get_group(from,graph),get_group(from,graph),NULL)) %>%
group_by(the_group) %>% mutate(n=n()) %>% ungroup() %>%
mutate(max_n = max(n), weight = if_else(!is.na(the_group), in_group, out_group)) %>% pull(weight)
graph <- graph %>% activate(edges) %>% mutate(weight = weights)
graph
}
get_palette <- function(graph) {
# now handle some aesthetics
n_group <- graph %>% activate(nodes) %>% pull(group) %>% n_distinct()
base_pal <- c("#8AAFDD", "#ECD1FB", "#F48CDC", "#F06B8A", "#529756", "#BB3DD1",
"#D7A4B4", "#E8E1AE", "#9E922A", "#D299F2", "#FEECF2", "#AEF9E1",
"#41BA30", "#5A6AC5", "#6DEAA3", "#D3F79C", "#AFB7B1", "#FEA185",
"#F5FC72", "#86E4FB", few_pal()(8))
#distinctColorPalette(n_group, runTsne = TRUE)
base_pal
}
get_node_recommendations <- function(graph, node_name) {
set.seed(1492)
the_node <- graph %>%
activate(nodes) %>%
as_tibble %>%
mutate(row_num = row_number()) %>%
filter(name == node_name) %>%
pull(row_num)
# just chaining everything together...
topReqs <- graph %>% as_adj() %>%
as("realRatingMatrix") %>%
recommenderlab::normalize() %>%
recommenderlab::binarize(minRating=-2) %>%
assign("b_adj", ., envir = .GlobalEnv) %>%
Recommender(method = "ALS") %>%
predict( b_adj[the_node,], n=50) %>%
bestN(n=30)
topN_tib <- tibble(
nodes = topReqs %>%
as("list") %>%
unlist,
ratings = topReqs %>%
getRatings() %>%
unlist
)
}