diff --git a/Workshop4- Homework/myworld/db.sqlite3 b/Workshop4- Homework/myworld/db.sqlite3 new file mode 100644 index 0000000..db2d4f3 Binary files /dev/null and b/Workshop4- Homework/myworld/db.sqlite3 differ diff --git a/Workshop4- Homework/myworld/docker-compose.yml b/Workshop4- Homework/myworld/docker-compose.yml new file mode 100644 index 0000000..f2aa4ee --- /dev/null +++ b/Workshop4- Homework/myworld/docker-compose.yml @@ -0,0 +1,66 @@ +version: "3" +services: + web_service: + build: + context: ./ + dockerfile: ./dockerfiles/Dockerfile + image: workshop1_web + container_name: workshop_web_container + stdin_open: true # docker attach container_id + tty: true + environment: + - RABBITMQ_DEFAULT_USER=myuser + - RABBITMQ_DEFAULT_PASS=mypassword + - BROKER_HOST=service-rabbitmq + - RABBITMQ_DEFAULT_VHOST=extractor + - BROKER_PORT=5672 + ports: + - "8000:8000" + volumes: + - .:/root/workspace/site + + psql-db: + image: 'postgres:14' + container_name: psql-db + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5446:5432' + volumes: + - db:/var/lib/postgresql/data + + service-rabbitmq: + container_name: "service_rabbitmq" + image: rabbitmq:3.8-management-alpine + environment: + - RABBITMQ_DEFAULT_USER=myuser + - RABBITMQ_DEFAULT_PASS=mypassword + - RABBITMQ_DEFAULT_VHOST=extractor + - BROKER_HOST=service-rabbitmq + ports: + - '5673:5673' + - '15676:15672' + worker: + build: + context: ./ + dockerfile: ./dockerfiles/Dockerfile + image: workshop1_web + container_name: worker + stdin_open: true # docker attach container_id + tty: true + environment: + - RABBITMQ_DEFAULT_USER=myuser + - RABBITMQ_DEFAULT_PASS=mypassword + - BROKER_HOST=service-rabbitmq + - RABBITMQ_DEFAULT_VHOST=extractor + - BROKER_PORT=5672 + ports: + - "4356:8000" + volumes: + - .:/root/workspace/site + +volumes: + db: + driver: local diff --git a/Workshop4- Homework/myworld/dockerfiles/Dockerfile b/Workshop4- Homework/myworld/dockerfiles/Dockerfile new file mode 100644 index 0000000..3cfa40a --- /dev/null +++ b/Workshop4- Homework/myworld/dockerfiles/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.10.2-alpine3.15 +# Install required packages +# For psycopg2 +RUN apk update && \ +apk --no-cache add --virtual build-deps-alpine build-base && \ +apk --no-cache add --virtual postgresql-deps libpq-dev +# Install requirements +RUN pip install --upgrade pip +RUN pip install Django psycopg2==2.9.3 bs4 html5lib requests python-dateutil celery pytz +# Create directories +RUN mkdir -p /root/workspace/src +COPY ./ /root/workspace/site +# Switch to project directory +WORKDIR /root/workspace/site diff --git a/Workshop4- Homework/myworld/manage.py b/Workshop4- Homework/myworld/manage.py new file mode 100755 index 0000000..ffd27b1 --- /dev/null +++ b/Workshop4- Homework/myworld/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/Workshop4- Homework/myworld/members.zip b/Workshop4- Homework/myworld/members.zip new file mode 100644 index 0000000..338399d Binary files /dev/null and b/Workshop4- Homework/myworld/members.zip differ diff --git a/Workshop4- Homework/myworld/members/__init__.py b/Workshop4- Homework/myworld/members/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Workshop4- Homework/myworld/members/__pycache__/__init__.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..397ecc3 Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/__init__.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/__pycache__/admin.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/admin.cpython-310.pyc new file mode 100644 index 0000000..df3d1cc Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/admin.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/__pycache__/apps.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/apps.cpython-310.pyc new file mode 100644 index 0000000..19d6611 Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/apps.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/__pycache__/models.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/models.cpython-310.pyc new file mode 100644 index 0000000..d98e785 Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/models.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/__pycache__/tasks.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/tasks.cpython-310.pyc new file mode 100644 index 0000000..2d395bf Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/tasks.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/__pycache__/urls.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/urls.cpython-310.pyc new file mode 100644 index 0000000..e77dbe5 Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/urls.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/__pycache__/views.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/views.cpython-310.pyc new file mode 100644 index 0000000..d25bb3e Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/views.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/admin.py b/Workshop4- Homework/myworld/members/admin.py new file mode 100644 index 0000000..e66a10e --- /dev/null +++ b/Workshop4- Homework/myworld/members/admin.py @@ -0,0 +1,52 @@ +from django.contrib import admin +from .models import Students, Blog, Job, JobLogs, JobStats +from django.urls import reverse +from django.utils.html import format_html + +class DjStudentAdmin(admin.ModelAdmin): + list_display = ("first_name", "last_name", "address", "roll_number", "mobile", "branch") + list_filter = ("branch",) + +class DjBlogAdmin(admin.ModelAdmin): + list_display = ("title", "release_date", "blog_time", "author", "created_date") + list_filter = ("author",) + + +class DjJob(admin.ModelAdmin): + def run(self, obj): + return format_html('RUN', reverse('scraping', args=(str(obj.pk)))) + + def view_stats(self, obj): + path = "../jobstats/?q={}".format(obj.pk) + return format_html(f'''stats''') + + run.short_description = 'Run' + run.allow_tags = True + view_stats.short_description = 'Stats' + view_stats.allow_tags = True + + list_display = ("job_name", "start_date", "end_date", "no_of_blogs", "start_no", "created_date", "run", "view_stats") + list_filter = ("job_name", "start_date") + readonly_fields = ("created_date",) + +class DjJobStats(admin.ModelAdmin): + def view_logs(self, obj): + path = "../joblogs/?q={}".format(obj.pk) + return format_html(f'''Logs''') + + view_logs.short_description = 'Stats' + view_logs.allow_tags = True + list_display = ("job", "status", "view_logs", "total_blogs", "no_of_blogs_extracted", "start_date", "end_date") + search_fields = ('job__pk',) + +class DjJobLogs(admin.ModelAdmin): + list_display = ("date", "log", "function_name") + search_fields = ('job_stats__pk',) + + +# Register your models here. +admin.site.register(Blog, DjBlogAdmin) +admin.site.register(Students, DjStudentAdmin) +admin.site.register(Job, DjJob) +admin.site.register(JobStats, DjJobStats) +admin.site.register(JobLogs, DjJobLogs) diff --git a/Workshop4- Homework/myworld/members/apps.py b/Workshop4- Homework/myworld/members/apps.py new file mode 100644 index 0000000..21366a2 --- /dev/null +++ b/Workshop4- Homework/myworld/members/apps.py @@ -0,0 +1,90 @@ +from django.apps import AppConfig +import psycopg2 +import requests +import re +from bs4 import BeautifulSoup, element +import datetime +from dateutil.parser import parse + + +db_name = 'member_db' +db_user = 'postgres' +db_pass = '123456' +db_host = 'psql-db' +db_port = '5432' + +conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host, port=db_port) + +def add_row_to_blog(title, author, date, time): + sql = """INSERT INTO members_blog (title, release_date, blog_time, author, created_date) VALUES (%s, %s::DATE, %s::TIME, %s, NOW())""" + + with conn: + with conn.cursor() as curs: + time=time.replace('\u202f',"") + curs.execute(sql, (title, date, time, author)) + +def truncate_table(): + print("Truncating contents all the tables") + with conn: + with conn.cursor() as curs: + curs.execute("TRUNCATE members_blog CASCADE;") + +def start_extraction(start_date=None, end_date=None): + print("Extraction started") + url = "https://blog.python.org/" + + data = requests.get(url) + page_soup = BeautifulSoup(data.text, 'html.parser') + + if start_date: + start_date = parse(start_date) + if end_date: + end_date = parse(end_date) + + blogs = page_soup.select('div.date-outer') + truncate_table() + for blog in blogs: + date = blog.select('.date-header span')[0].get_text() + + converted_date = parse(date) + + if start_date and converted_date < start_date: + continue + if end_date and converted_date > end_date: + continue + + post = blog.select('.post')[0] + + title = "" + title_bar = post.select('.post-title') + if len(title_bar) > 0: + title = title_bar[0].text + else: + title = post.select('.post-body')[0].contents[0].text + + # getting the author and blog time + post_footer = post.select('.post-footer')[0] + + author = post_footer.select('.post-author span')[0].text + + time = post_footer.select('abbr')[0].text + + add_row_to_blog(title, author, date, time) + + print("\nTitle:", title.strip('\n')) + print("Date:", date, ) + print("Time:", time) + print("Author:", author) + + # print("Number of blogs read:", count) + print( + "\n---------------------------------------------------------------------------------------------------------------\n") + +if __name__ == "__main__": + start_extraction() + + +class MembersConfig(AppConfig): + default_auto_field='django.db.models.BigAutoField' + name = 'members' + diff --git a/Workshop4- Homework/myworld/members/migrations/0001_initial.py b/Workshop4- Homework/myworld/members/migrations/0001_initial.py new file mode 100644 index 0000000..0a7cee3 --- /dev/null +++ b/Workshop4- Homework/myworld/members/migrations/0001_initial.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2 on 2023-05-23 05:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Members', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('firstname', models.CharField(max_length=255)), + ('lastname', models.CharField(max_length=255)), + ], + ), + ] diff --git a/Workshop4- Homework/myworld/members/migrations/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.py b/Workshop4- Homework/myworld/members/migrations/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.py new file mode 100644 index 0000000..a5a46ad --- /dev/null +++ b/Workshop4- Homework/myworld/members/migrations/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.py @@ -0,0 +1,78 @@ +# Generated by Django 4.2.5 on 2023-09-26 07:44 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Blog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('title', models.CharField(max_length=500)), + ('release_date', models.DateTimeField(verbose_name='Realse Date')), + ('blog_time', models.CharField(max_length=50)), + ('author', models.CharField(max_length=200)), + ('created_date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Created Date')), + ], + ), + migrations.CreateModel( + name='Job', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('job_name', models.CharField(max_length=500)), + ('start_date', models.DateTimeField(null=True, verbose_name='Blog start date')), + ('end_date', models.DateTimeField(null=True, verbose_name='Blog end date')), + ('start_no', models.IntegerField(null=True, verbose_name='No of blogs to skip')), + ('no_of_blogs', models.IntegerField(null=True, verbose_name='No of blogs to extract')), + ('created_date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Job created date')), + ], + ), + migrations.CreateModel( + name='JobLogs', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('log', models.TextField(verbose_name='job logs')), + ('function_name', models.TextField(verbose_name='Function name')), + ('date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Log date')), + ], + ), + migrations.CreateModel( + name='JobStats', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('status', models.CharField(max_length=50)), + ('total_blogs', models.IntegerField(null=True, verbose_name='Total blogs found')), + ('no_of_blogs_extracted', models.IntegerField(null=True, verbose_name='No of blogs extracted')), + ('start_date', models.DateTimeField(null=True, verbose_name='Extraction start date')), + ('end_date', models.DateTimeField(null=True, verbose_name='Extraction start date')), + ('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='members.job')), + ], + ), + migrations.CreateModel( + name='Students', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('first_name', models.CharField(max_length=200)), + ('last_name', models.CharField(max_length=200)), + ('address', models.CharField(max_length=200)), + ('roll_number', models.IntegerField()), + ('mobile', models.CharField(max_length=10)), + ('branch', models.CharField(choices=[('BA', 'BA'), ('B.COM', 'B.COM'), ('MBA', 'MBA'), ('CA', 'CA')], max_length=10, null=True)), + ], + ), + migrations.DeleteModel( + name='Members', + ), + migrations.AddField( + model_name='joblogs', + name='job_stats', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='members.jobstats'), + ), + ] diff --git a/Workshop4- Homework/myworld/members/migrations/__init__.py b/Workshop4- Homework/myworld/members/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Workshop4- Homework/myworld/members/migrations/__pycache__/0001_initial.cpython-310.pyc b/Workshop4- Homework/myworld/members/migrations/__pycache__/0001_initial.cpython-310.pyc new file mode 100644 index 0000000..350ce33 Binary files /dev/null and b/Workshop4- Homework/myworld/members/migrations/__pycache__/0001_initial.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/migrations/__pycache__/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.cpython-310.pyc b/Workshop4- Homework/myworld/members/migrations/__pycache__/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.cpython-310.pyc new file mode 100644 index 0000000..b8e6d3c Binary files /dev/null and b/Workshop4- Homework/myworld/members/migrations/__pycache__/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/migrations/__pycache__/__init__.cpython-310.pyc b/Workshop4- Homework/myworld/members/migrations/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..0c09359 Binary files /dev/null and b/Workshop4- Homework/myworld/members/migrations/__pycache__/__init__.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/members/models.py b/Workshop4- Homework/myworld/members/models.py new file mode 100644 index 0000000..3d5a455 --- /dev/null +++ b/Workshop4- Homework/myworld/members/models.py @@ -0,0 +1,63 @@ +from django.db import models + +BRANCH_CHOICES = ( + ("BA", "BA"), + ("B.COM", "B.COM"), + ("MBA", "MBA"), + ("CA", "CA"), +) + + +# Create your models here. +class Students(models.Model): + first_name = models.CharField(max_length=200) + last_name = models.CharField(max_length=200) + address = models.CharField(max_length=200) + roll_number = models.IntegerField() + mobile = models.CharField(max_length=10) + branch = models.CharField(max_length=10, choices=BRANCH_CHOICES, null=True) + + def __str__(self): + return self.first_name + " " + self.last_name + + +class Blog(models.Model): + title = models.CharField(max_length=500) + release_date = models.DateTimeField('Realse Date') + blog_time = models.CharField(max_length=50) + author = models.CharField(max_length=200) + created_date = models.DateTimeField('Created Date', auto_now_add=True, null=True) + + def __str__(self): + return self.title + + +class Job(models.Model): + job_name = models.CharField(max_length=500) + start_date = models.DateTimeField('Blog start date', null=True) + end_date = models.DateTimeField('Blog end date', null=True) + start_no = models.IntegerField(verbose_name="No of blogs to skip", null=True) + no_of_blogs = models.IntegerField(verbose_name="No of blogs to extract", null=True) + created_date = models.DateTimeField('Job created date', auto_now_add=True, null=True) + + def __str__(self): + return self.job_name + + +class JobStats(models.Model): + job = models.ForeignKey(Job, on_delete=models.CASCADE) + status = models.CharField(max_length=50) + total_blogs = models.IntegerField(verbose_name="Total blogs found", null=True) + no_of_blogs_extracted = models.IntegerField(verbose_name='No of blogs extracted', null=True) + start_date = models.DateTimeField('Extraction start date', null=True) + end_date = models.DateTimeField('Extraction start date', null=True) + + def __str__(self): + return self.job + + +class JobLogs(models.Model): + job_stats = models.ForeignKey(JobStats, on_delete=models.CASCADE) + log = models.TextField(verbose_name="job logs") + function_name = models.TextField(verbose_name="Function name") + date = models.DateTimeField('Log date', null=True, auto_now_add=True) diff --git a/Workshop4- Homework/myworld/members/tasks.py b/Workshop4- Homework/myworld/members/tasks.py new file mode 100644 index 0000000..4961583 --- /dev/null +++ b/Workshop4- Homework/myworld/members/tasks.py @@ -0,0 +1,83 @@ +import datetime +from myworld.celery import app +from .models import Job, Blog, JobStats, JobLogs +import requests +from bs4 import BeautifulSoup +from dateutil.parser import parse +import pytz + +utc=pytz.UTC + +@app.task(bind=True, name="extract") +def extract(self, job_id): + job_obj = Job.objects.get(pk=job_id) + job_stats_obj = JobStats(job=job_obj, status="IN PROGRESS", start_date=datetime.datetime.now(), no_of_blogs_extracted=0) + job_stats_obj.save() + JobLogs(job_stats=job_stats_obj, log="Extraction stated", function_name="extract", date=datetime.datetime.now()).save() + start_date = job_obj.start_date + end_date = job_obj.end_date + start_id = job_obj.start_no + no_of_articles = job_obj.no_of_blogs + url = "https://blog.python.org/" + try: + data = requests.get(url) + page_soup = BeautifulSoup(data.text, 'html.parser') + + blogs = page_soup.select('div.date-outer') + article_count = 0 + counter = 1 + for blog in blogs: + article_count += 1 + if start_id and article_count < int(start_id): + continue + if no_of_articles and counter > int(no_of_articles): + continue + date = blog.select('.date-header span')[0].get_text() + + converted_date = parse(date) + JobLogs(job_stats=job_stats_obj, log=f"Extracting {article_count}", function_name="extract", date=datetime.datetime.now()).save() + if start_date and utc.localize(converted_date) < start_date: + continue + if end_date and utc.localize(converted_date) > end_date: + continue + + post = blog.select('.post')[0] + + title = "" + title_bar = post.select('.post-title') + if len(title_bar) > 0: + title = title_bar[0].text + else: + title = post.select('.post-body')[0].contents[0].text + + # getting the author and blog time + post_footer = post.select('.post-footer')[0] + + author = post_footer.select('.post-author span')[0].text + + time = post_footer.select('abbr')[0].text + + blog_obj = Blog(title=title, author=author, release_date=date, blog_time=time) + blog_obj.save() + job_stats_obj.no_of_blogs_extracted += job_stats_obj.no_of_blogs_extracted + job_stats_obj.save() + + print("\nTitle:", title.strip('\n')) + print("Date:", date, ) + print("Time:", time) + print("Author:", author) + counter += 1 + JobLogs(job_stats=job_stats_obj, log=f"Total {counter} articles extracted: ", function_name="extract", date=datetime.datetime.now()).save() + job_stats_obj.end_date = datetime.datetime.now() + job_stats_obj.total_blogs = article_count + job_stats_obj.status = "COMPLETED" + job_stats_obj.save() + JobLogs(job_stats=job_stats_obj, log="Extraction Done", function_name="extract", date=datetime.datetime.now()).save() + except Exception as ex: + JobLogs(job_stats=job_stats_obj, log=str(ex), function_name="extract", date=datetime.datetime.now()).save() + job_stats_obj.end_date = datetime.datetime.now() + job_stats_obj.total_blogs = article_count + job_stats_obj.status = "FAILED" + job_stats_obj.save() + JobLogs(job_stats=job_stats_obj, log="Extraction Done", function_name="extract", date=datetime.datetime.now()).save() + diff --git a/Workshop4- Homework/myworld/members/tests.py b/Workshop4- Homework/myworld/members/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/Workshop4- Homework/myworld/members/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/Workshop4- Homework/myworld/members/urls.py b/Workshop4- Homework/myworld/members/urls.py new file mode 100644 index 0000000..50c5bd5 --- /dev/null +++ b/Workshop4- Homework/myworld/members/urls.py @@ -0,0 +1,11 @@ +from django.urls import path +from . import views + +urlpatterns = [ + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('start_python_blog_scraping', views.python_blog_scrap, name='triger'), + path('blog/', views.BlogView.as_view()), + path('python_blog_scraping/', views.python_blog_scraping, name="scraping") +] diff --git a/Workshop4- Homework/myworld/members/views.py b/Workshop4- Homework/myworld/members/views.py new file mode 100644 index 0000000..77b1bcf --- /dev/null +++ b/Workshop4- Homework/myworld/members/views.py @@ -0,0 +1,80 @@ +from django.views import View +from .models import Students, Blog +from django.http import JsonResponse +from django.views.decorators.csrf import csrf_exempt +from django.utils.decorators import method_decorator +from . import apps +from members.tasks import extract +from django.shortcuts import redirect + + +@method_decorator(csrf_exempt, name='dispatch') +class StudentView(View): + + def get(self, request, rolno=None, branch=None): + student_model_list = [] + try: + if rolno: + student_model_list = Students.objects.filter(roll_number=rolno) + elif branch: + student_model_list = Students.objects.filter(branch=branch) + except Students.DoesNotExist: + return JsonResponse({'status': 'failed', "students": None}, status=400) + students = [] + for student in student_model_list: + data = { + "first_name" : student.first_name, + "last_name": student.last_name, + "address": student.address, + "roll_number": student.roll_number, + "mobile": student.mobile, + "branch": student.branch + } + students.append(data) + return JsonResponse({'status': 'success', "students": students}, status=200) + def post(self, request): + if not request.POST.get('first_name') or not request.POST.get('last_name') or not request.POST.get('address') or not request.POST.get('roll_number') or not request.POST.get('mobile'): + return JsonResponse({'status': 'failed', "message" : "all fields required"}, status=500) + Students.objects.create( + first_name= request.POST.get('first_name'), + last_name= request.POST.get('last_name'), + address= request.POST.get('address'), + roll_number= request.POST.get('roll_number'), + mobile= request.POST.get('mobile'), + branch= request.POST.get('branch')) + return JsonResponse({'status': 'sucess'}, status=200) + + +@method_decorator(csrf_exempt, name='dispatch') + +class BlogView(View): + def post(self, request): + start_date = request.POST.get('start_date', None) + end_date = request.POST.get('end_date', None) + no_of_articles = request.POST.get('no_of_articles', None) + start_id = request.POST.get('start_id', None) + + apps.start_extraction(start_date=start_date, end_date=end_date, no_of_articles=no_of_articles, start_id=start_id) + + blog_model_list = Blog.objects.filter() + + blogs = [] + for blog in blog_model_list: + data = { + "Title": blog.title, + "Release Date": blog.release_date, + "Author": blog.author, + "Blog time": blog.blog_time + } + blogs.append(data) + + return JsonResponse({'status': 'success', "students": blogs}, status=200) + +def python_blog_scrap(request): + apps.start_extraction() + return JsonResponse({'status': 'sucess', "message" : "Extracted and populated the table."}, status=200) + + +def python_blog_scraping(request, job_id): + extract.delay(job_id) + return redirect('/admin/members/job/') diff --git a/Workshop4- Homework/myworld/myworld/__init__.py b/Workshop4- Homework/myworld/myworld/__init__.py new file mode 100644 index 0000000..15d7c50 --- /dev/null +++ b/Workshop4- Homework/myworld/myworld/__init__.py @@ -0,0 +1,5 @@ +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celery import app as celery_app + +__all__ = ('celery_app',) diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/__init__.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..407b40a Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/__init__.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/celery.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/celery.cpython-310.pyc new file mode 100644 index 0000000..ff056ff Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/celery.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/settings.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/settings.cpython-310.pyc new file mode 100644 index 0000000..4147a4b Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/settings.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/urls.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/urls.cpython-310.pyc new file mode 100644 index 0000000..03ba69e Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/urls.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/wsgi.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/wsgi.cpython-310.pyc new file mode 100644 index 0000000..eec4d42 Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/wsgi.cpython-310.pyc differ diff --git a/Workshop4- Homework/myworld/myworld/asgi.py b/Workshop4- Homework/myworld/myworld/asgi.py new file mode 100644 index 0000000..36404b3 --- /dev/null +++ b/Workshop4- Homework/myworld/myworld/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for myworld project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings') + +application = get_asgi_application() diff --git a/Workshop4- Homework/myworld/myworld/celery.py b/Workshop4- Homework/myworld/myworld/celery.py new file mode 100644 index 0000000..b4eaa53 --- /dev/null +++ b/Workshop4- Homework/myworld/myworld/celery.py @@ -0,0 +1,33 @@ +import os +from celery import Celery + +# Set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings') + +app = Celery('myworld') + +# Using a string here means the worker doesn't have to serialize +# the configuration object to child processes. +# - namespace='CELERY' means all celery-related configuration keys +# should have a `CELERY_` prefix. +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Load task modules from all registered Django apps. +app.autodiscover_tasks() + + +@app.task(bind=True) +def debug_task(self): + print(f'Request: {self.request!r}') + +app.conf.beat_schedule = { + #Scheduler Name + 'run-task-ten-seconds': { + # Task Name (Name Specified in Decorator) + 'task': 'extract', + # Schedule + 'schedule': 60.0, + # Function Arguments + 'args': (1,) + } +} diff --git a/Workshop4- Homework/myworld/myworld/settings.py b/Workshop4- Homework/myworld/myworld/settings.py new file mode 100644 index 0000000..5f09aba --- /dev/null +++ b/Workshop4- Homework/myworld/myworld/settings.py @@ -0,0 +1,126 @@ +import os + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-p(&-i=zw$r=bqzck3oi3y9%*4ps!a*ierb803y_jcpqd+!z_9@' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = ['*'] + + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'members.apps.MembersConfig' +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'myworld.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'myworld.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/4.2/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': 'member_db', + 'USER': 'postgres', + 'PASSWORD': '123456', + 'HOST': 'psql-db', + 'PORT': 5432, + } +} + +# Password validation +# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/4.2/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ + +STATIC_URL = 'static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +CELERY_TASK_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'json' +CELERY_TIMEZONE = 'America/Los_Angeles' +# This configures rabbitmq as the datastore between Django + Celery +CELERY_BROKER_URL = 'amqp://{0}:{1}@{2}:{3}/{4}'.format( + os.environ["RABBITMQ_DEFAULT_USER"], os.environ["RABBITMQ_DEFAULT_PASS"], + os.environ["BROKER_HOST"], os.environ["BROKER_PORT"], + os.environ["RABBITMQ_DEFAULT_VHOST"]) diff --git a/Workshop4- Homework/myworld/myworld/urls.py b/Workshop4- Homework/myworld/myworld/urls.py new file mode 100644 index 0000000..921daef --- /dev/null +++ b/Workshop4- Homework/myworld/myworld/urls.py @@ -0,0 +1,23 @@ +""" +URL configuration for myworld project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/4.2/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import include, path + +urlpatterns = [ + path('members/', include('members.urls')), + path('admin/', admin.site.urls), +] diff --git a/Workshop4- Homework/myworld/myworld/wsgi.py b/Workshop4- Homework/myworld/myworld/wsgi.py new file mode 100644 index 0000000..cb3f79d --- /dev/null +++ b/Workshop4- Homework/myworld/myworld/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for myworld project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings') + +application = get_wsgi_application() diff --git a/Workshop4- Homework/myworld/p_file.prof b/Workshop4- Homework/myworld/p_file.prof new file mode 100644 index 0000000..ed2ed94 Binary files /dev/null and b/Workshop4- Homework/myworld/p_file.prof differ diff --git a/Workshop4- Homework/myworld/script.py b/Workshop4- Homework/myworld/script.py new file mode 100644 index 0000000..d2edb6f --- /dev/null +++ b/Workshop4- Homework/myworld/script.py @@ -0,0 +1,27 @@ +import random +from memory_profiler import profile +# Simple function to print messages +@profile +def print_msg(): + for i in range(10): + print("Program completed") + +# Generate random data +@profile +def generate(): + data = [random.randint(0, 99) for p in range(0, 1000)] + return data + +# Function to search +@profile +def search_function(data): + for i in data: + if i in [100,200,300,400,500]: + print("success") + +def main(): + data=generate() + search_function(data) + print_msg() + +main() diff --git a/Workshop4- Homework/myworld/web_scrapper.py b/Workshop4- Homework/myworld/web_scrapper.py new file mode 100644 index 0000000..fa44338 --- /dev/null +++ b/Workshop4- Homework/myworld/web_scrapper.py @@ -0,0 +1,86 @@ +import psycopg2 +import requests +import re +from bs4 import BeautifulSoup, element +from datetime import datetime + +# For the credentials mentioned below, you may refer the docker-compose.yml present in myworld . +db_name = 'member_db' +db_user = 'postgres' +db_pass = '123456' +db_host = 'psql-db' +db_port = '5432' + +# This will create the connection the to postgres database. +conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host, port=db_port) + + +def add_row_to_blog(title, author, date, time): + # This function will add the entry to database + sql = """INSERT INTO members_blog (title, release_date, blog_time, author, created_date) VALUES (%s, %s::DATE, %s::TIME, %s, NOW())""" + + with conn: + with conn.cursor() as curs: + curs.execute(sql, (title, date, time, author)) + + +def truncate_table(): + # This function will delete the existing entries from the database. + with conn: + with conn.cursor() as curs: + curs.execute("TRUNCATE members_blog CASCADE;") + + +def convert_time_format(time_str): + # Convert time string to a valid format recognized by PostgreSQL + time_obj = datetime.strptime(time_str, "%I:%M %p") + return time_obj.strftime("%H:%M:%S") + + +def start_extraction(): + print("Extraction started") + url = "https://blog.python.org/" + + # Each time when we add a new entry, we delete the existing entries. + truncate_table() + data = requests.get(url) + page_soup = BeautifulSoup(data.text, 'html.parser') + + # Getting all the articles + blogs = page_soup.select('div.date-outer') + + for blog in blogs: + # loop through each article + date = blog.select('.date-header span')[0].get_text() + + post = blog.select('.post')[0] + + title = "" + title_bar = post.select('.post-title') + if len(title_bar) > 0: + title = title_bar[0].text + else: + title = post.select('.post-body')[0].contents[0].text + + # getting the author and blog time + post_footer = post.select('.post-footer')[0] + + author = post_footer.select('.post-author span')[0].text + + time = post_footer.select('abbr')[0].text + time = convert_time_format(time) # Convert time format + # Inserting data into the database + add_row_to_blog(title, author, date, time) + + print("\nTitle:", title.strip('\n')) + print("Date:", date) + print("Time:", time) + print("Author:", author) + + print( + "\n---------------------------------------------------------------------------------------------------------------\n") + + +if __name__ == "__main__": + start_extraction() + diff --git a/Workshop4- Homework/test_celery/__pycache__/celery.cpython-310.pyc b/Workshop4- Homework/test_celery/__pycache__/celery.cpython-310.pyc new file mode 100644 index 0000000..1c74497 Binary files /dev/null and b/Workshop4- Homework/test_celery/__pycache__/celery.cpython-310.pyc differ diff --git a/Workshop4- Homework/test_celery/__pycache__/run_tasks.cpython-310.pyc b/Workshop4- Homework/test_celery/__pycache__/run_tasks.cpython-310.pyc new file mode 100644 index 0000000..0f8c508 Binary files /dev/null and b/Workshop4- Homework/test_celery/__pycache__/run_tasks.cpython-310.pyc differ diff --git a/Workshop4- Homework/test_celery/__pycache__/tasks.cpython-310.pyc b/Workshop4- Homework/test_celery/__pycache__/tasks.cpython-310.pyc new file mode 100644 index 0000000..5685795 Binary files /dev/null and b/Workshop4- Homework/test_celery/__pycache__/tasks.cpython-310.pyc differ diff --git a/Workshop4- Homework/test_celery/celery.py b/Workshop4- Homework/test_celery/celery.py new file mode 100644 index 0000000..f21799c --- /dev/null +++ b/Workshop4- Homework/test_celery/celery.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from celery import Celery + +app = Celery('test_celery', + broker='amqp://jimmy:jimmy123@localhost/jimmy_vhost', + backend='rpc://', + include=['test_celery.tasks']) diff --git a/Workshop4- Homework/test_celery/run_tasks.py b/Workshop4- Homework/test_celery/run_tasks.py new file mode 100644 index 0000000..24a1442 --- /dev/null +++ b/Workshop4- Homework/test_celery/run_tasks.py @@ -0,0 +1,13 @@ +from .tasks import longtime_add +import time + +if __name__ == '__main__': + result = longtime_add.delay(1,2) + # at this time, our task is not finished, so it will return False + print ('Task finished? ', result.ready()) + print ('Task result: ', result.result) + # sleep 10 seconds to ensure the task has been finished + time.sleep(10) + # now the task should be finished and ready method will return True + print ('Task finished? ', result.ready()) + print ('Task result: ', result.result) diff --git a/Workshop4- Homework/test_celery/tasks.py b/Workshop4- Homework/test_celery/tasks.py new file mode 100644 index 0000000..88f2f1e --- /dev/null +++ b/Workshop4- Homework/test_celery/tasks.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from test_celery.celery import app +import time + +@app.task +def longtime_add(x, y): + print ('long time task begins') + # sleep 5 seconds + time.sleep(5) + print ('long time task finished') + return x + y