Skip to content

Commit

Permalink
Merge pull request OpenKnowledgeMaps#440 from OpenKnowledgeMaps/subje…
Browse files Browse the repository at this point in the history
…ct-orig-consistency

implement subject_orig field where missing

Former-commit-id: 4efd56f
  • Loading branch information
chreman authored Jun 18, 2020
2 parents 5fc6756 + 7849318 commit e66aaa2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
1 change: 1 addition & 0 deletions server/preprocessing/other-scripts/openaire.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ get_return_values <- function(all_artifacts){
}

preprocess_data <- function(all_artifacts){
all_artifacts$subject_orig <- all_artifacts$subject
all_artifacts$subject <- unlist(lapply(all_artifacts$subject, function(x) {gsub("\\[[A-Za-z \\.-]+\\]", "", x)})) # removes [ INFO.INFO-MA ] Computer Science [cs]/Multiagent Systems [cs.MA]
all_artifacts$subject <- unlist(lapply(all_artifacts$subject, function(x) {gsub(" ?/", ";", x)}))
all_artifacts$subject <- unlist(lapply(all_artifacts$subject, function(x) {gsub("\\:.*\\:\\:", "", x)})) # keeps only last part after :: ":Enginyeria de la telecomunicació::Processament del senyal::Reconeixement de formes [Àrees temàtiques de la UPC]"
Expand Down
3 changes: 2 additions & 1 deletion server/workers/gsheets/src/search_gsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ def create_input_data(df):
metadata["year"] = df["Publication Date"]
metadata["url"] = df.ID
metadata["readers"] = 0
metadata["subject"] = df.Keywords
metadata["subject_orig"] = df.Keywords
metadata["subject"] = metadata["subject_orig"]
metadata["oa_state"] = df.Access
metadata["link"] = df["Link to PDF"].map(lambda x: x.replace("N/A", "") if isinstance(x, str) else "")
metadata["relevance"] = df.index
Expand Down

0 comments on commit e66aaa2

Please sign in to comment.