diff --git a/Test.pdf b/Test.pdf new file mode 100644 index 0000000..c0103fb Binary files /dev/null and b/Test.pdf differ diff --git a/getSummary.py b/getSummary.py new file mode 100644 index 0000000..6161b72 --- /dev/null +++ b/getSummary.py @@ -0,0 +1,51 @@ +import pika, sys, os, PyPDF2 +from transformers import pipeline + +summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="tf") + +def main(): + connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) + channel = connection.channel() + + channel.queue_declare(queue='sendPDF') + + def callback(ch, method, properties, body): + print(f" [x] Received {body}") + + pdfGet = body.decode() + # Open the PDF file in read-binary mode + with open(pdfGet, 'rb') as file: + # Create a PDF object + pdf = PyPDF2.PdfReader(file) + # Get the number of pages in the PDF + page = pdf.pages[6] + text = page.extract_text() + summary = summarizer(text, max_length=100, min_length=30, do_sample=False) + sum = (summary[0]['summary_text']) + channel.queue_declare(queue='sendTxt') + channel.basic_publish(exchange='', routing_key='sendTxt', body=sum) + print(" [x] Sent 'A message getText'") + + + channel.basic_consume(queue='sendPDF', on_message_callback=callback, auto_ack=True) + + print(' [*] Waiting for messages. To exit press CTRL+C') + + channel.start_consuming() + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + print('Interrupted') + try: + sys.exit(0) + except SystemExit: + os._exit(0) + + + + + + diff --git a/getText.py b/getText.py new file mode 100644 index 0000000..88e6be5 --- /dev/null +++ b/getText.py @@ -0,0 +1,22 @@ +import glob +import pika + +""""" +import glob, os +os.chdir("./") +for file in glob.glob("*.pdf"): + print(file) + """ + +file = input("Enter your file name: ") + +connection = pika.BlockingConnection( + pika.ConnectionParameters(host='localhost')) +channel = connection.channel() + +channel.queue_declare(queue='sendPDF') + + +channel.basic_publish(exchange='', routing_key='sendPDF', body=file) +print(" [x] Sent 'A message getText'") +connection.close() diff --git a/output.py b/output.py new file mode 100644 index 0000000..146add6 --- /dev/null +++ b/output.py @@ -0,0 +1,31 @@ +import pika, sys, os, PyPDF2 +from transformers import pipeline + + +def main(): + connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) + channel = connection.channel() + + channel.queue_declare(queue='sendTxt') + + def callback(ch, method, properties, body): + print(f" [x] Received {body}") + print(body) + + + + channel.basic_consume(queue='sendTxt', on_message_callback=callback, auto_ack=True) + + print(' [*] Waiting for messages. To exit press CTRL+C') + channel.start_consuming() + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + print('Interrupted') + try: + sys.exit(0) + except SystemExit: + os._exit(0) diff --git a/tempCodeRunnerFile.py b/tempCodeRunnerFile.py new file mode 100644 index 0000000..b02a3ca --- /dev/null +++ b/tempCodeRunnerFile.py @@ -0,0 +1 @@ +sendPDF \ No newline at end of file