Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
hope-data-science authored Feb 13, 2020
1 parent 92c14c9 commit f9f6625
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 2 deletions.
40 changes: 40 additions & 0 deletions R/doc_group.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

#' @title Construct network of documents based on keyword co-occurrence
#'
#' @description Create a \code{tbl_graph}(a class provided by \pkg{tidygraph}) from the tidy table with document ID and keyword.
#' Each entry(row) should contain only one document and keyword in the tidy format.This function would
#' group the documents.
#' @param dt A data.frame containing at least two columns with document ID and keyword.
#' @param id Quoted characters specifying the column name of document ID.Default uses "id".
#' @param keyword Quoted characters specifying the column name of keyword.Default uses "keyword".
#' @param com_detect_fun Community detection function,provided by \pkg{tidygraph}(wrappers around clustering
#' functions provided by \pkg{igraph}), see \code{\link[tidygraph]{group_graph}} to find other optional algorithms.
#' Default uses \code{\link[tidygraph]{group_fast_greedy}}.
#' @return A tbl_graph, representing the document relation network based on
#' keyword co-occurrence.
#' @details As we could classify keywords using document ID, we could also
#' classify documents with keywords. In the output network, the nodes are documents
#' and the edges mean the two documents share same keywords with each other.
#' @examples
#' library(akc)
#' bibli_data_table %>%
#' keyword_clean(id = "id",keyword = "keyword") %>%
#' doc_group(id = "id",keyword = "keyword") -> grouped_doc
#'
#' grouped_doc


#' @export
doc_group = function(dt,id = "id",keyword = "keyword",
com_detect_fun = group_fast_greedy){
dt %>%
as_tibble() %>%
transmute(id = .data[[id]],keyword = .data[[keyword]]) %>%
pairwise_count(id,keyword,upper = FALSE) %>%
graph_from_data_frame(directed = FALSE) %>%
as_tbl_graph() %>%
mutate(group = com_detect_fun()) %>%
rename(id = name)
}


20 changes: 18 additions & 2 deletions R/keyword_cloud.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#' @description This function should be used to plot the object exported by
#' \code{\link[akc]{keyword_group}}. It could draw a robust word cloud of keywords.
#' @param tibble_graph A \code{tbl_graph} output by \code{\link[akc]{keyword_group}}.
#' @param group_no If one wants to visualize a specific group, gives the group number.
#' Default uses \code{NULL},which returns all the groups.
#' @param top How many top keywords (by frequency) should be plot? Default uses 50.
#' @param max_size Size of largest keyword.Default uses 20.
#' @details In the output graph, the size of keywords is proportional to the keyword
Expand All @@ -25,9 +27,13 @@
#'
#' grouped_keyword %>%
#' keyword_cloud()
#'
#' grouped_keywords %>%
#' keyword_cloud(group_no = 1)

keyword_cloud = function(tibble_graph,top = 50,max_size = 20){
tibble_graph %>%
keyword_cloud = function(tibble_graph,group_no = NULL,top = 50,max_size = 20){
if(is.null(group_no))
tibble_graph %>%
as_tibble() %>%
top_n(top,freq) %>%
mutate(group = as.factor(group)) %>%
Expand All @@ -36,6 +42,16 @@ keyword_cloud = function(tibble_graph,top = 50,max_size = 20){
scale_size_area(max_size = max_size) +
scale_x_discrete(breaks = NULL,name = "") +
theme_minimal()
else
tibble_graph %>%
as_tibble() %>%
filter(group == group_no) %>%
top_n(top,freq) %>%
ggplot(aes(label = name,size = freq)) +
geom_text_wordcloud_area() +
scale_size_area(max_size = max_size) +
scale_x_discrete(breaks = NULL,name = "") +
theme_minimal()
}


6 changes: 6 additions & 0 deletions R/keyword_network.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@
#' keyword_group(id = "id",keyword = "keyword") %>%
#' keyword_network()
#'
#' # use color with `scale_fill_`
#' bibli_data_table %>%
#' keyword_clean(id = "id",keyword = "keyword") %>%
#' keyword_group(id = "id",keyword = "keyword") %>%
#' keyword_network() + ggplot2::ggplot2::scale_fill_viridis_d()
#'
#' # without facet
#' bibli_data_table %>%
#' keyword_clean(id = "id",keyword = "keyword") %>%
Expand Down

0 comments on commit f9f6625

Please sign in to comment.