-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_KEGG_pathway_gene_names_table.R
55 lines (45 loc) · 1.67 KB
/
get_KEGG_pathway_gene_names_table.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# GPT4o made this!
# Well a great part of it...
# Install KEGGREST if not already installed
if (!requireNamespace("KEGGREST", quietly = TRUE)) {
install.packages("BiocManager")
BiocManager::install("KEGGREST")
}
# Load the KEGGREST package
library(KEGGREST)
# Get the pathway information for mmu00190 (oxidative phosphorylation in mouse)
keggEntry <- "mmu00190"
pathway <- keggGet(keggEntry)
# Extract the list of genes from the pathway information
genes <- pathway[[1]]$GENE
# Initialize empty vectors to store Entrez Gene IDs, gene names, and descriptions
entrez_ids <- c()
gene_names <- c()
descriptions <- c()
# Loop through the genes to extract information from the even-numbered lines
for (i in seq(1, length(genes), by = 2)) {
entrez_id <- genes[i]
gene_description <- genes[i + 1]
# Check if the gene_description contains "; "
# It is assumed that if there is no "; ", then there is no gene_name
if (grepl("; ", gene_description)) {
gene_info <- unlist(strsplit(gene_description, "; ", fixed = TRUE))
gene_name <- gene_info[1]
description <- gene_info[2]
} else {
gene_name <- NA
description <- gene_description
}
entrez_ids <- c(entrez_ids, entrez_id)
gene_names <- c(gene_names, gene_name)
descriptions <- c(descriptions, description)
}
# Create a data frame with the collected information
gene_table <- data.frame(
EntrezGeneID = entrez_ids,
GeneName = gene_names,
Description = descriptions,
stringsAsFactors = FALSE
)
# Print the resulting data frame
write.table(gene_table, file=paste(keggEntry, "_gene_table", ".tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")