Skip to content

Commit

Permalink
Unnecessary imports removed; requirements.txt added
Browse files Browse the repository at this point in the history
  • Loading branch information
jigsaw2212 committed Jun 10, 2021
1 parent f94c7fb commit e9513e2
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,17 @@
warnings.simplefilter("ignore", ResourceWarning)

import argparse
import io
import json
import os
from os.path import basename
import re
import six
import sys
import pprint
import spacy
import rouge
import numpy as np
import rouge.rouge_score as rouge_score
import warnings
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt
from nltk.stem.snowball import SnowballStemmer
from matching.games import HospitalResident
from nltk.translate.meteor_score import meteor_score
from bert_score import BERTScorer
from transformers import AutoTokenizer
from nltk.tokenize import word_tokenize, sent_tokenize

from sentence_transformers import SentenceTransformer, util

Expand All @@ -41,7 +30,6 @@
# https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1
model_bi_encoder_paraphrase = SentenceTransformer('paraphrase-distilroberta-base-v1')


model_bi_encoder_paraphrase.max_seq_length = 512

pp = pprint.PrettyPrinter(indent=2)
Expand All @@ -51,7 +39,6 @@
warnings.filterwarnings("ignore", category=ResourceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)


# Breaks down chapter text into smaller length paragraphs that we can align ground truth summary sentences to
def merge_text_paragraphs(paragraphs, min_sent=3, max_sent=12):
spacy_nlp = spacy.load("en_core_web_lg")
Expand Down Expand Up @@ -154,7 +141,7 @@ def compute_similarities_bi_encoder(paragraphs, summaries):
paragraphs_embeddings_paraphrase = model_bi_encoder_paraphrase.encode(paragraphs, convert_to_tensor=True)
summaries_embeddings_paraphrase = model_bi_encoder_paraphrase.encode(summaries, convert_to_tensor=True)

similarity_matrix_bi_encoder_paraphrase = util.pytorch_cos_sim(summaries_embeddings_paraphrase, paragraphs_embeddings_paraphrase).numpy()
similarity_matrix_bi_encoder_paraphrase = util.pytorch_cos_sim(summaries_embeddings_paraphrase, paragraphs_embeddings_paraphrase).cpu().numpy()

return similarity_matrix_bi_encoder_paraphrase

Expand Down
10 changes: 10 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
sentence_transformers==1.2.0
matplotlib==3.4.2
numpy==1.19.5
spacy==3.0.3
matching==1.4
nltk==3.6.2
tqdm==4.49.0
beautifulsoup4==4.9.3
Unidecode==1.2.0
word2number==1.1

0 comments on commit e9513e2

Please sign in to comment.