#!/bin/bash #verbose terminal output set -xv start=`date +%s` #function to loop a list of crime variables through a folder of text files function entitySearch { #create books array from input file book_list=() while read book do book_list+=($book) done < bookArray #outer loop though books for myBook in ${book_list[@]} do #send $myBook bash variable to nameStanza.py as a filename to process stanzaNER.py #send output back to bash to run grep etc.on and output to file. grep "PERSON" $myBook | sort | uniq >> ${myBook}_PERSON_output grep "NORP" $myBook | sort | uniq >> ${myBook}_NORP_output grep "FAC" $myBook | sort | uniq >> ${myBook}_FAC_output grep "ORG" $myBook | sort | uniq >> ${myBook}_ORG_output grep "GPE" $myBook | sort | uniq >> ${myBook}_GPE_output grep "LOC" $myBook | sort | uniq >> ${myBook}_LOC_output grep "PRODUCT" $myBook | sort | uniq >> ${myBook}_PRODUCT_output grep "EVENT" $myBook | sort | uniq >> ${myBook}_EVENT_output grep "WORK_OF_ART" $myBook | sort | uniq >> ${myBook}_WORK_OF_ART_output grep "LAW" $myBook | sort | uniq >> ${myBook}_LAW_output grep "LANGUAGE" $myBook | sort | uniq >> ${myBook}_LANGUAGE_output grep "DATE" $myBook | sort | uniq >> ${myBook}_DATE_output grep "TIME" $myBook | sort | uniq >> ${myBook}_TIME_output grep "PERCENT" $myBook | sort | uniq >> ${myBook}_PERCENT_output grep "MONEY" $myBook | sort | uniq >> ${myBook}_MONEY_output grep "QUANTITY" $myBook | sort | uniq >> ${myBook}_QUANTITY_output grep "ORDINAL" $myBook | sort | uniq >> ${myBook}_ORDINAL_output grep "CARDINAL" $myBook | sort | uniq >> ${myBook}_CARDINAL_output done } entitySearch end=`date +%s` echo Execution time was `expr $end - $start` seconds. exit 0 #stanzaNER.py #import stanza #nlp = stanza.Pipeline(lang='en', processors='tokenize,ner') #with open(myBook, 'r') as f: #doc = nlp(f.read()) #print(*[f'entity: {ent.text}\ttype: {ent.type}' for ent in doc.ents], sep='\n') #OR - but need to figure out how to create a file for the output... #with open("output/file_output","w") as character_names: #for sent in ([f'entity: {ent.text}\ttype: {ent.type}' for sent in doc.sentences for ent in sent.ents]): #character_names.write(sent) #character_names.write("\n")