diff --git a/R/doc_group.R b/R/doc_group.R new file mode 100644 index 0000000..42c38bb --- /dev/null +++ b/R/doc_group.R @@ -0,0 +1,40 @@ + +#' @title Construct network of documents based on keyword co-occurrence +#' +#' @description Create a \code{tbl_graph}(a class provided by \pkg{tidygraph}) from the tidy table with document ID and keyword. +#' Each entry(row) should contain only one document and keyword in the tidy format.This function would +#' group the documents. +#' @param dt A data.frame containing at least two columns with document ID and keyword. +#' @param id Quoted characters specifying the column name of document ID.Default uses "id". +#' @param keyword Quoted characters specifying the column name of keyword.Default uses "keyword". +#' @param com_detect_fun Community detection function,provided by \pkg{tidygraph}(wrappers around clustering +#' functions provided by \pkg{igraph}), see \code{\link[tidygraph]{group_graph}} to find other optional algorithms. +#' Default uses \code{\link[tidygraph]{group_fast_greedy}}. +#' @return A tbl_graph, representing the document relation network based on +#' keyword co-occurrence. +#' @details As we could classify keywords using document ID, we could also +#' classify documents with keywords. In the output network, the nodes are documents +#' and the edges mean the two documents share same keywords with each other. +#' @examples +#' library(akc) +#' bibli_data_table %>% +#' keyword_clean(id = "id",keyword = "keyword") %>% +#' doc_group(id = "id",keyword = "keyword") -> grouped_doc +#' +#' grouped_doc + + +#' @export +doc_group = function(dt,id = "id",keyword = "keyword", + com_detect_fun = group_fast_greedy){ + dt %>% + as_tibble() %>% + transmute(id = .data[[id]],keyword = .data[[keyword]]) %>% + pairwise_count(id,keyword,upper = FALSE) %>% + graph_from_data_frame(directed = FALSE) %>% + as_tbl_graph() %>% + mutate(group = com_detect_fun()) %>% + rename(id = name) +} + + diff --git a/R/keyword_cloud.R b/R/keyword_cloud.R index a8dd63a..d638800 100644 --- a/R/keyword_cloud.R +++ b/R/keyword_cloud.R @@ -4,6 +4,8 @@ #' @description This function should be used to plot the object exported by #' \code{\link[akc]{keyword_group}}. It could draw a robust word cloud of keywords. #' @param tibble_graph A \code{tbl_graph} output by \code{\link[akc]{keyword_group}}. +#' @param group_no If one wants to visualize a specific group, gives the group number. +#' Default uses \code{NULL},which returns all the groups. #' @param top How many top keywords (by frequency) should be plot? Default uses 50. #' @param max_size Size of largest keyword.Default uses 20. #' @details In the output graph, the size of keywords is proportional to the keyword @@ -25,9 +27,13 @@ #' #' grouped_keyword %>% #' keyword_cloud() +#' +#' grouped_keywords %>% +#' keyword_cloud(group_no = 1) -keyword_cloud = function(tibble_graph,top = 50,max_size = 20){ - tibble_graph %>% +keyword_cloud = function(tibble_graph,group_no = NULL,top = 50,max_size = 20){ + if(is.null(group_no)) + tibble_graph %>% as_tibble() %>% top_n(top,freq) %>% mutate(group = as.factor(group)) %>% @@ -36,6 +42,16 @@ keyword_cloud = function(tibble_graph,top = 50,max_size = 20){ scale_size_area(max_size = max_size) + scale_x_discrete(breaks = NULL,name = "") + theme_minimal() + else + tibble_graph %>% + as_tibble() %>% + filter(group == group_no) %>% + top_n(top,freq) %>% + ggplot(aes(label = name,size = freq)) + + geom_text_wordcloud_area() + + scale_size_area(max_size = max_size) + + scale_x_discrete(breaks = NULL,name = "") + + theme_minimal() } diff --git a/R/keyword_network.R b/R/keyword_network.R index efe59d3..9642b75 100644 --- a/R/keyword_network.R +++ b/R/keyword_network.R @@ -28,6 +28,12 @@ #' keyword_group(id = "id",keyword = "keyword") %>% #' keyword_network() #' +#' # use color with `scale_fill_` +#' bibli_data_table %>% +#' keyword_clean(id = "id",keyword = "keyword") %>% +#' keyword_group(id = "id",keyword = "keyword") %>% +#' keyword_network() + ggplot2::ggplot2::scale_fill_viridis_d() +#' #' # without facet #' bibli_data_table %>% #' keyword_clean(id = "id",keyword = "keyword") %>%