-
-
Notifications
You must be signed in to change notification settings - Fork 53
118 lines (103 loc) · 4.9 KB
/
paper_ranking.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
name: Run Paper Ranking Script and Update Issue
on:
schedule:
- cron: '0 0 1 * *' # runs on the first day of every month
workflow_dispatch:
permissions:
contents: write
issues: write
jobs:
paper-ranking:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.12"
- name: Install dependencies
run: |
# TODO update to using uv
python -m pip install --upgrade pip
pip install .[paper-ranking]
- name: Set Date Variables
id: set-date-variables
run: |
end_date=$(date +'%Y-%m-%d')
start_date=$(date -d "$end_date - 30 days" +'%Y-%m-%d')
echo "START_DATE=$start_date" >> $GITHUB_ENV
echo "END_DATE=$end_date" >> $GITHUB_ENV
- name: Set PYTHONPATH
run: |
echo "PYTHONPATH=$PWD/src" >> $GITHUB_ENV
- name: Run Paper Ranking Script
id: run-ranking-script
run: |
echo "PYTHONPATH=$PYTHONPATH" # Verify PYTHONPATH
# TODO update to using python -m
python src/bioregistry/analysis/paper_ranking.py --start-date ${{ env.START_DATE }} --end-date ${{ env.END_DATE }}
- name: Configure Git
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
- name: Commit and Push Changes
run: |
git add exports/analyses/paper_ranking/predictions.tsv
git commit -m "Update predictions file with papers between ${{ env.START_DATE }} and ${{ env.END_DATE }}"
git push
- name: Find Commit Hash
id: get-commit-hash
run: echo "COMMIT_HASH=$(git rev-parse HEAD)" >> $GITHUB_ENV
- name: Find Existing Issue
id: find-issue
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { data: issues } = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'paper-ranking-results'
});
const issue = issues.find(issue => issue.title === 'Potentially relevant papers ranked for curation');
return issue ? issue.number : null;
- name: Create or Update Issue with Comment
id: create-or-update-issue
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const issueNumber = ${{ steps.find-issue.outputs.result }};
const startDate = process.env.START_DATE;
const endDate = process.env.END_DATE;
const commitHash = process.env.COMMIT_HASH;
const rankingFileLink = `https://github.com/${{ github.repository }}/blob/${commitHash}/exports/analyses/paper_ranking/predictions.tsv`;
const content = fs.readFileSync(`exports/analyses/paper_ranking/predictions.tsv`, 'utf8');
const lines = content.split('\n').slice(1, 21);
const rows = lines.map(line => {
const [pubmed, title] = line.split('\t');
const link = `https://bioregistry.io/pubmed:${pubmed}`;
return `| [${pubmed}](${link}) | ${title} |`;
});
const tableHeader = '| PubMed ID | Title |\n| --- | --- |\n';
const commentBody = `This issue contains monthly updates to an automatically ranked list of PubMed papers as candidates for curation in the Bioregistry. Papers may be relevant in at least three ways: \n(1) as a new prefix for a resource that can be added to the Bioregistry,\n(2) as a provider for an existing prefix, or\n(3) as a new publication for an existing prefix already in the Bioregistry.\n\nThese curations can happen in separate issues and pull requests. The full list of ranked papers can be found [here](${rankingFileLink}). If you review any of these papers for relevance, you should edit the curated papers file [here](https://github.com/${{ github.repository }}/blob/main/src/bioregistry/data/curated_papers.tsv); these curations are taken into account when retraining the ranking model.\n\n**New entries for ${startDate} to ${endDate}:**\n\n${tableHeader}${rows.join('\n')}`;
if (issueNumber) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body: commentBody,
});
} else {
const response = await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: 'Potentially relevant papers ranked for curation',
body: `${commentBody}`,
labels: ['paper-ranking-results'],
});
core.setOutput('issue-number', response.data.number);
}