diff --git a/Workshop4- Homework/myworld/db.sqlite3 b/Workshop4- Homework/myworld/db.sqlite3
new file mode 100644
index 0000000..db2d4f3
Binary files /dev/null and b/Workshop4- Homework/myworld/db.sqlite3 differ
diff --git a/Workshop4- Homework/myworld/docker-compose.yml b/Workshop4- Homework/myworld/docker-compose.yml
new file mode 100644
index 0000000..f2aa4ee
--- /dev/null
+++ b/Workshop4- Homework/myworld/docker-compose.yml
@@ -0,0 +1,66 @@
+version: "3"
+services:
+ web_service:
+ build:
+ context: ./
+ dockerfile: ./dockerfiles/Dockerfile
+ image: workshop1_web
+ container_name: workshop_web_container
+ stdin_open: true # docker attach container_id
+ tty: true
+ environment:
+ - RABBITMQ_DEFAULT_USER=myuser
+ - RABBITMQ_DEFAULT_PASS=mypassword
+ - BROKER_HOST=service-rabbitmq
+ - RABBITMQ_DEFAULT_VHOST=extractor
+ - BROKER_PORT=5672
+ ports:
+ - "8000:8000"
+ volumes:
+ - .:/root/workspace/site
+
+ psql-db:
+ image: 'postgres:14'
+ container_name: psql-db
+ environment:
+ - PGPASSWORD=123456
+ - POSTGRES_USER=postgres
+ - POSTGRES_PASSWORD=123456
+ ports:
+ - '5446:5432'
+ volumes:
+ - db:/var/lib/postgresql/data
+
+ service-rabbitmq:
+ container_name: "service_rabbitmq"
+ image: rabbitmq:3.8-management-alpine
+ environment:
+ - RABBITMQ_DEFAULT_USER=myuser
+ - RABBITMQ_DEFAULT_PASS=mypassword
+ - RABBITMQ_DEFAULT_VHOST=extractor
+ - BROKER_HOST=service-rabbitmq
+ ports:
+ - '5673:5673'
+ - '15676:15672'
+ worker:
+ build:
+ context: ./
+ dockerfile: ./dockerfiles/Dockerfile
+ image: workshop1_web
+ container_name: worker
+ stdin_open: true # docker attach container_id
+ tty: true
+ environment:
+ - RABBITMQ_DEFAULT_USER=myuser
+ - RABBITMQ_DEFAULT_PASS=mypassword
+ - BROKER_HOST=service-rabbitmq
+ - RABBITMQ_DEFAULT_VHOST=extractor
+ - BROKER_PORT=5672
+ ports:
+ - "4356:8000"
+ volumes:
+ - .:/root/workspace/site
+
+volumes:
+ db:
+ driver: local
diff --git a/Workshop4- Homework/myworld/dockerfiles/Dockerfile b/Workshop4- Homework/myworld/dockerfiles/Dockerfile
new file mode 100644
index 0000000..3cfa40a
--- /dev/null
+++ b/Workshop4- Homework/myworld/dockerfiles/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.10.2-alpine3.15
+# Install required packages
+# For psycopg2
+RUN apk update && \
+apk --no-cache add --virtual build-deps-alpine build-base && \
+apk --no-cache add --virtual postgresql-deps libpq-dev
+# Install requirements
+RUN pip install --upgrade pip
+RUN pip install Django psycopg2==2.9.3 bs4 html5lib requests python-dateutil celery pytz
+# Create directories
+RUN mkdir -p /root/workspace/src
+COPY ./ /root/workspace/site
+# Switch to project directory
+WORKDIR /root/workspace/site
diff --git a/Workshop4- Homework/myworld/manage.py b/Workshop4- Homework/myworld/manage.py
new file mode 100755
index 0000000..ffd27b1
--- /dev/null
+++ b/Workshop4- Homework/myworld/manage.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+"""Django's command-line utility for administrative tasks."""
+import os
+import sys
+
+
+def main():
+ """Run administrative tasks."""
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings')
+ try:
+ from django.core.management import execute_from_command_line
+ except ImportError as exc:
+ raise ImportError(
+ "Couldn't import Django. Are you sure it's installed and "
+ "available on your PYTHONPATH environment variable? Did you "
+ "forget to activate a virtual environment?"
+ ) from exc
+ execute_from_command_line(sys.argv)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/Workshop4- Homework/myworld/members.zip b/Workshop4- Homework/myworld/members.zip
new file mode 100644
index 0000000..338399d
Binary files /dev/null and b/Workshop4- Homework/myworld/members.zip differ
diff --git a/Workshop4- Homework/myworld/members/__init__.py b/Workshop4- Homework/myworld/members/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Workshop4- Homework/myworld/members/__pycache__/__init__.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..397ecc3
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/__init__.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/__pycache__/admin.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/admin.cpython-310.pyc
new file mode 100644
index 0000000..df3d1cc
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/admin.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/__pycache__/apps.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/apps.cpython-310.pyc
new file mode 100644
index 0000000..19d6611
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/apps.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/__pycache__/models.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/models.cpython-310.pyc
new file mode 100644
index 0000000..d98e785
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/models.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/__pycache__/tasks.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/tasks.cpython-310.pyc
new file mode 100644
index 0000000..2d395bf
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/tasks.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/__pycache__/urls.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/urls.cpython-310.pyc
new file mode 100644
index 0000000..e77dbe5
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/urls.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/__pycache__/views.cpython-310.pyc b/Workshop4- Homework/myworld/members/__pycache__/views.cpython-310.pyc
new file mode 100644
index 0000000..d25bb3e
Binary files /dev/null and b/Workshop4- Homework/myworld/members/__pycache__/views.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/admin.py b/Workshop4- Homework/myworld/members/admin.py
new file mode 100644
index 0000000..e66a10e
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/admin.py
@@ -0,0 +1,52 @@
+from django.contrib import admin
+from .models import Students, Blog, Job, JobLogs, JobStats
+from django.urls import reverse
+from django.utils.html import format_html
+
+class DjStudentAdmin(admin.ModelAdmin):
+ list_display = ("first_name", "last_name", "address", "roll_number", "mobile", "branch")
+ list_filter = ("branch",)
+
+class DjBlogAdmin(admin.ModelAdmin):
+ list_display = ("title", "release_date", "blog_time", "author", "created_date")
+ list_filter = ("author",)
+
+
+class DjJob(admin.ModelAdmin):
+ def run(self, obj):
+ return format_html('RUN', reverse('scraping', args=(str(obj.pk))))
+
+ def view_stats(self, obj):
+ path = "../jobstats/?q={}".format(obj.pk)
+ return format_html(f'''stats''')
+
+ run.short_description = 'Run'
+ run.allow_tags = True
+ view_stats.short_description = 'Stats'
+ view_stats.allow_tags = True
+
+ list_display = ("job_name", "start_date", "end_date", "no_of_blogs", "start_no", "created_date", "run", "view_stats")
+ list_filter = ("job_name", "start_date")
+ readonly_fields = ("created_date",)
+
+class DjJobStats(admin.ModelAdmin):
+ def view_logs(self, obj):
+ path = "../joblogs/?q={}".format(obj.pk)
+ return format_html(f'''Logs''')
+
+ view_logs.short_description = 'Stats'
+ view_logs.allow_tags = True
+ list_display = ("job", "status", "view_logs", "total_blogs", "no_of_blogs_extracted", "start_date", "end_date")
+ search_fields = ('job__pk',)
+
+class DjJobLogs(admin.ModelAdmin):
+ list_display = ("date", "log", "function_name")
+ search_fields = ('job_stats__pk',)
+
+
+# Register your models here.
+admin.site.register(Blog, DjBlogAdmin)
+admin.site.register(Students, DjStudentAdmin)
+admin.site.register(Job, DjJob)
+admin.site.register(JobStats, DjJobStats)
+admin.site.register(JobLogs, DjJobLogs)
diff --git a/Workshop4- Homework/myworld/members/apps.py b/Workshop4- Homework/myworld/members/apps.py
new file mode 100644
index 0000000..21366a2
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/apps.py
@@ -0,0 +1,90 @@
+from django.apps import AppConfig
+import psycopg2
+import requests
+import re
+from bs4 import BeautifulSoup, element
+import datetime
+from dateutil.parser import parse
+
+
+db_name = 'member_db'
+db_user = 'postgres'
+db_pass = '123456'
+db_host = 'psql-db'
+db_port = '5432'
+
+conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host, port=db_port)
+
+def add_row_to_blog(title, author, date, time):
+ sql = """INSERT INTO members_blog (title, release_date, blog_time, author, created_date) VALUES (%s, %s::DATE, %s::TIME, %s, NOW())"""
+
+ with conn:
+ with conn.cursor() as curs:
+ time=time.replace('\u202f',"")
+ curs.execute(sql, (title, date, time, author))
+
+def truncate_table():
+ print("Truncating contents all the tables")
+ with conn:
+ with conn.cursor() as curs:
+ curs.execute("TRUNCATE members_blog CASCADE;")
+
+def start_extraction(start_date=None, end_date=None):
+ print("Extraction started")
+ url = "https://blog.python.org/"
+
+ data = requests.get(url)
+ page_soup = BeautifulSoup(data.text, 'html.parser')
+
+ if start_date:
+ start_date = parse(start_date)
+ if end_date:
+ end_date = parse(end_date)
+
+ blogs = page_soup.select('div.date-outer')
+ truncate_table()
+ for blog in blogs:
+ date = blog.select('.date-header span')[0].get_text()
+
+ converted_date = parse(date)
+
+ if start_date and converted_date < start_date:
+ continue
+ if end_date and converted_date > end_date:
+ continue
+
+ post = blog.select('.post')[0]
+
+ title = ""
+ title_bar = post.select('.post-title')
+ if len(title_bar) > 0:
+ title = title_bar[0].text
+ else:
+ title = post.select('.post-body')[0].contents[0].text
+
+ # getting the author and blog time
+ post_footer = post.select('.post-footer')[0]
+
+ author = post_footer.select('.post-author span')[0].text
+
+ time = post_footer.select('abbr')[0].text
+
+ add_row_to_blog(title, author, date, time)
+
+ print("\nTitle:", title.strip('\n'))
+ print("Date:", date, )
+ print("Time:", time)
+ print("Author:", author)
+
+ # print("Number of blogs read:", count)
+ print(
+ "\n---------------------------------------------------------------------------------------------------------------\n")
+
+if __name__ == "__main__":
+ start_extraction()
+
+
+class MembersConfig(AppConfig):
+ default_auto_field='django.db.models.BigAutoField'
+ name = 'members'
+
diff --git a/Workshop4- Homework/myworld/members/migrations/0001_initial.py b/Workshop4- Homework/myworld/members/migrations/0001_initial.py
new file mode 100644
index 0000000..0a7cee3
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/migrations/0001_initial.py
@@ -0,0 +1,22 @@
+# Generated by Django 4.2 on 2023-05-23 05:07
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Members',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('firstname', models.CharField(max_length=255)),
+ ('lastname', models.CharField(max_length=255)),
+ ],
+ ),
+ ]
diff --git a/Workshop4- Homework/myworld/members/migrations/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.py b/Workshop4- Homework/myworld/members/migrations/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.py
new file mode 100644
index 0000000..a5a46ad
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/migrations/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.py
@@ -0,0 +1,78 @@
+# Generated by Django 4.2.5 on 2023-09-26 07:44
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('members', '0001_initial'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Blog',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('title', models.CharField(max_length=500)),
+ ('release_date', models.DateTimeField(verbose_name='Realse Date')),
+ ('blog_time', models.CharField(max_length=50)),
+ ('author', models.CharField(max_length=200)),
+ ('created_date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Created Date')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='Job',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('job_name', models.CharField(max_length=500)),
+ ('start_date', models.DateTimeField(null=True, verbose_name='Blog start date')),
+ ('end_date', models.DateTimeField(null=True, verbose_name='Blog end date')),
+ ('start_no', models.IntegerField(null=True, verbose_name='No of blogs to skip')),
+ ('no_of_blogs', models.IntegerField(null=True, verbose_name='No of blogs to extract')),
+ ('created_date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Job created date')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='JobLogs',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('log', models.TextField(verbose_name='job logs')),
+ ('function_name', models.TextField(verbose_name='Function name')),
+ ('date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Log date')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='JobStats',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('status', models.CharField(max_length=50)),
+ ('total_blogs', models.IntegerField(null=True, verbose_name='Total blogs found')),
+ ('no_of_blogs_extracted', models.IntegerField(null=True, verbose_name='No of blogs extracted')),
+ ('start_date', models.DateTimeField(null=True, verbose_name='Extraction start date')),
+ ('end_date', models.DateTimeField(null=True, verbose_name='Extraction start date')),
+ ('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='members.job')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='Students',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('first_name', models.CharField(max_length=200)),
+ ('last_name', models.CharField(max_length=200)),
+ ('address', models.CharField(max_length=200)),
+ ('roll_number', models.IntegerField()),
+ ('mobile', models.CharField(max_length=10)),
+ ('branch', models.CharField(choices=[('BA', 'BA'), ('B.COM', 'B.COM'), ('MBA', 'MBA'), ('CA', 'CA')], max_length=10, null=True)),
+ ],
+ ),
+ migrations.DeleteModel(
+ name='Members',
+ ),
+ migrations.AddField(
+ model_name='joblogs',
+ name='job_stats',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='members.jobstats'),
+ ),
+ ]
diff --git a/Workshop4- Homework/myworld/members/migrations/__init__.py b/Workshop4- Homework/myworld/members/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Workshop4- Homework/myworld/members/migrations/__pycache__/0001_initial.cpython-310.pyc b/Workshop4- Homework/myworld/members/migrations/__pycache__/0001_initial.cpython-310.pyc
new file mode 100644
index 0000000..350ce33
Binary files /dev/null and b/Workshop4- Homework/myworld/members/migrations/__pycache__/0001_initial.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/migrations/__pycache__/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.cpython-310.pyc b/Workshop4- Homework/myworld/members/migrations/__pycache__/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.cpython-310.pyc
new file mode 100644
index 0000000..b8e6d3c
Binary files /dev/null and b/Workshop4- Homework/myworld/members/migrations/__pycache__/0002_blog_job_joblogs_jobstats_students_delete_members_and_more.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/migrations/__pycache__/__init__.cpython-310.pyc b/Workshop4- Homework/myworld/members/migrations/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..0c09359
Binary files /dev/null and b/Workshop4- Homework/myworld/members/migrations/__pycache__/__init__.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/members/models.py b/Workshop4- Homework/myworld/members/models.py
new file mode 100644
index 0000000..3d5a455
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/models.py
@@ -0,0 +1,63 @@
+from django.db import models
+
+BRANCH_CHOICES = (
+ ("BA", "BA"),
+ ("B.COM", "B.COM"),
+ ("MBA", "MBA"),
+ ("CA", "CA"),
+)
+
+
+# Create your models here.
+class Students(models.Model):
+ first_name = models.CharField(max_length=200)
+ last_name = models.CharField(max_length=200)
+ address = models.CharField(max_length=200)
+ roll_number = models.IntegerField()
+ mobile = models.CharField(max_length=10)
+ branch = models.CharField(max_length=10, choices=BRANCH_CHOICES, null=True)
+
+ def __str__(self):
+ return self.first_name + " " + self.last_name
+
+
+class Blog(models.Model):
+ title = models.CharField(max_length=500)
+ release_date = models.DateTimeField('Realse Date')
+ blog_time = models.CharField(max_length=50)
+ author = models.CharField(max_length=200)
+ created_date = models.DateTimeField('Created Date', auto_now_add=True, null=True)
+
+ def __str__(self):
+ return self.title
+
+
+class Job(models.Model):
+ job_name = models.CharField(max_length=500)
+ start_date = models.DateTimeField('Blog start date', null=True)
+ end_date = models.DateTimeField('Blog end date', null=True)
+ start_no = models.IntegerField(verbose_name="No of blogs to skip", null=True)
+ no_of_blogs = models.IntegerField(verbose_name="No of blogs to extract", null=True)
+ created_date = models.DateTimeField('Job created date', auto_now_add=True, null=True)
+
+ def __str__(self):
+ return self.job_name
+
+
+class JobStats(models.Model):
+ job = models.ForeignKey(Job, on_delete=models.CASCADE)
+ status = models.CharField(max_length=50)
+ total_blogs = models.IntegerField(verbose_name="Total blogs found", null=True)
+ no_of_blogs_extracted = models.IntegerField(verbose_name='No of blogs extracted', null=True)
+ start_date = models.DateTimeField('Extraction start date', null=True)
+ end_date = models.DateTimeField('Extraction start date', null=True)
+
+ def __str__(self):
+ return self.job
+
+
+class JobLogs(models.Model):
+ job_stats = models.ForeignKey(JobStats, on_delete=models.CASCADE)
+ log = models.TextField(verbose_name="job logs")
+ function_name = models.TextField(verbose_name="Function name")
+ date = models.DateTimeField('Log date', null=True, auto_now_add=True)
diff --git a/Workshop4- Homework/myworld/members/tasks.py b/Workshop4- Homework/myworld/members/tasks.py
new file mode 100644
index 0000000..4961583
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/tasks.py
@@ -0,0 +1,83 @@
+import datetime
+from myworld.celery import app
+from .models import Job, Blog, JobStats, JobLogs
+import requests
+from bs4 import BeautifulSoup
+from dateutil.parser import parse
+import pytz
+
+utc=pytz.UTC
+
+@app.task(bind=True, name="extract")
+def extract(self, job_id):
+ job_obj = Job.objects.get(pk=job_id)
+ job_stats_obj = JobStats(job=job_obj, status="IN PROGRESS", start_date=datetime.datetime.now(), no_of_blogs_extracted=0)
+ job_stats_obj.save()
+ JobLogs(job_stats=job_stats_obj, log="Extraction stated", function_name="extract", date=datetime.datetime.now()).save()
+ start_date = job_obj.start_date
+ end_date = job_obj.end_date
+ start_id = job_obj.start_no
+ no_of_articles = job_obj.no_of_blogs
+ url = "https://blog.python.org/"
+ try:
+ data = requests.get(url)
+ page_soup = BeautifulSoup(data.text, 'html.parser')
+
+ blogs = page_soup.select('div.date-outer')
+ article_count = 0
+ counter = 1
+ for blog in blogs:
+ article_count += 1
+ if start_id and article_count < int(start_id):
+ continue
+ if no_of_articles and counter > int(no_of_articles):
+ continue
+ date = blog.select('.date-header span')[0].get_text()
+
+ converted_date = parse(date)
+ JobLogs(job_stats=job_stats_obj, log=f"Extracting {article_count}", function_name="extract", date=datetime.datetime.now()).save()
+ if start_date and utc.localize(converted_date) < start_date:
+ continue
+ if end_date and utc.localize(converted_date) > end_date:
+ continue
+
+ post = blog.select('.post')[0]
+
+ title = ""
+ title_bar = post.select('.post-title')
+ if len(title_bar) > 0:
+ title = title_bar[0].text
+ else:
+ title = post.select('.post-body')[0].contents[0].text
+
+ # getting the author and blog time
+ post_footer = post.select('.post-footer')[0]
+
+ author = post_footer.select('.post-author span')[0].text
+
+ time = post_footer.select('abbr')[0].text
+
+ blog_obj = Blog(title=title, author=author, release_date=date, blog_time=time)
+ blog_obj.save()
+ job_stats_obj.no_of_blogs_extracted += job_stats_obj.no_of_blogs_extracted
+ job_stats_obj.save()
+
+ print("\nTitle:", title.strip('\n'))
+ print("Date:", date, )
+ print("Time:", time)
+ print("Author:", author)
+ counter += 1
+ JobLogs(job_stats=job_stats_obj, log=f"Total {counter} articles extracted: ", function_name="extract", date=datetime.datetime.now()).save()
+ job_stats_obj.end_date = datetime.datetime.now()
+ job_stats_obj.total_blogs = article_count
+ job_stats_obj.status = "COMPLETED"
+ job_stats_obj.save()
+ JobLogs(job_stats=job_stats_obj, log="Extraction Done", function_name="extract", date=datetime.datetime.now()).save()
+ except Exception as ex:
+ JobLogs(job_stats=job_stats_obj, log=str(ex), function_name="extract", date=datetime.datetime.now()).save()
+ job_stats_obj.end_date = datetime.datetime.now()
+ job_stats_obj.total_blogs = article_count
+ job_stats_obj.status = "FAILED"
+ job_stats_obj.save()
+ JobLogs(job_stats=job_stats_obj, log="Extraction Done", function_name="extract", date=datetime.datetime.now()).save()
+
diff --git a/Workshop4- Homework/myworld/members/tests.py b/Workshop4- Homework/myworld/members/tests.py
new file mode 100644
index 0000000..7ce503c
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/tests.py
@@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.
diff --git a/Workshop4- Homework/myworld/members/urls.py b/Workshop4- Homework/myworld/members/urls.py
new file mode 100644
index 0000000..50c5bd5
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/urls.py
@@ -0,0 +1,11 @@
+from django.urls import path
+from . import views
+
+urlpatterns = [
+ path('rest/student/', views.StudentView.as_view()),
+ path('rest/student/', views.StudentView.as_view()),
+ path('rest/student/', views.StudentView.as_view()),
+ path('start_python_blog_scraping', views.python_blog_scrap, name='triger'),
+ path('blog/', views.BlogView.as_view()),
+ path('python_blog_scraping/', views.python_blog_scraping, name="scraping")
+]
diff --git a/Workshop4- Homework/myworld/members/views.py b/Workshop4- Homework/myworld/members/views.py
new file mode 100644
index 0000000..77b1bcf
--- /dev/null
+++ b/Workshop4- Homework/myworld/members/views.py
@@ -0,0 +1,80 @@
+from django.views import View
+from .models import Students, Blog
+from django.http import JsonResponse
+from django.views.decorators.csrf import csrf_exempt
+from django.utils.decorators import method_decorator
+from . import apps
+from members.tasks import extract
+from django.shortcuts import redirect
+
+
+@method_decorator(csrf_exempt, name='dispatch')
+class StudentView(View):
+
+ def get(self, request, rolno=None, branch=None):
+ student_model_list = []
+ try:
+ if rolno:
+ student_model_list = Students.objects.filter(roll_number=rolno)
+ elif branch:
+ student_model_list = Students.objects.filter(branch=branch)
+ except Students.DoesNotExist:
+ return JsonResponse({'status': 'failed', "students": None}, status=400)
+ students = []
+ for student in student_model_list:
+ data = {
+ "first_name" : student.first_name,
+ "last_name": student.last_name,
+ "address": student.address,
+ "roll_number": student.roll_number,
+ "mobile": student.mobile,
+ "branch": student.branch
+ }
+ students.append(data)
+ return JsonResponse({'status': 'success', "students": students}, status=200)
+ def post(self, request):
+ if not request.POST.get('first_name') or not request.POST.get('last_name') or not request.POST.get('address') or not request.POST.get('roll_number') or not request.POST.get('mobile'):
+ return JsonResponse({'status': 'failed', "message" : "all fields required"}, status=500)
+ Students.objects.create(
+ first_name= request.POST.get('first_name'),
+ last_name= request.POST.get('last_name'),
+ address= request.POST.get('address'),
+ roll_number= request.POST.get('roll_number'),
+ mobile= request.POST.get('mobile'),
+ branch= request.POST.get('branch'))
+ return JsonResponse({'status': 'sucess'}, status=200)
+
+
+@method_decorator(csrf_exempt, name='dispatch')
+
+class BlogView(View):
+ def post(self, request):
+ start_date = request.POST.get('start_date', None)
+ end_date = request.POST.get('end_date', None)
+ no_of_articles = request.POST.get('no_of_articles', None)
+ start_id = request.POST.get('start_id', None)
+
+ apps.start_extraction(start_date=start_date, end_date=end_date, no_of_articles=no_of_articles, start_id=start_id)
+
+ blog_model_list = Blog.objects.filter()
+
+ blogs = []
+ for blog in blog_model_list:
+ data = {
+ "Title": blog.title,
+ "Release Date": blog.release_date,
+ "Author": blog.author,
+ "Blog time": blog.blog_time
+ }
+ blogs.append(data)
+
+ return JsonResponse({'status': 'success', "students": blogs}, status=200)
+
+def python_blog_scrap(request):
+ apps.start_extraction()
+ return JsonResponse({'status': 'sucess', "message" : "Extracted and populated the table."}, status=200)
+
+
+def python_blog_scraping(request, job_id):
+ extract.delay(job_id)
+ return redirect('/admin/members/job/')
diff --git a/Workshop4- Homework/myworld/myworld/__init__.py b/Workshop4- Homework/myworld/myworld/__init__.py
new file mode 100644
index 0000000..15d7c50
--- /dev/null
+++ b/Workshop4- Homework/myworld/myworld/__init__.py
@@ -0,0 +1,5 @@
+# This will make sure the app is always imported when
+# Django starts so that shared_task will use this app.
+from .celery import app as celery_app
+
+__all__ = ('celery_app',)
diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/__init__.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..407b40a
Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/__init__.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/celery.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/celery.cpython-310.pyc
new file mode 100644
index 0000000..ff056ff
Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/celery.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/settings.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/settings.cpython-310.pyc
new file mode 100644
index 0000000..4147a4b
Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/settings.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/urls.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/urls.cpython-310.pyc
new file mode 100644
index 0000000..03ba69e
Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/urls.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/myworld/__pycache__/wsgi.cpython-310.pyc b/Workshop4- Homework/myworld/myworld/__pycache__/wsgi.cpython-310.pyc
new file mode 100644
index 0000000..eec4d42
Binary files /dev/null and b/Workshop4- Homework/myworld/myworld/__pycache__/wsgi.cpython-310.pyc differ
diff --git a/Workshop4- Homework/myworld/myworld/asgi.py b/Workshop4- Homework/myworld/myworld/asgi.py
new file mode 100644
index 0000000..36404b3
--- /dev/null
+++ b/Workshop4- Homework/myworld/myworld/asgi.py
@@ -0,0 +1,16 @@
+"""
+ASGI config for myworld project.
+
+It exposes the ASGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/
+"""
+
+import os
+
+from django.core.asgi import get_asgi_application
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings')
+
+application = get_asgi_application()
diff --git a/Workshop4- Homework/myworld/myworld/celery.py b/Workshop4- Homework/myworld/myworld/celery.py
new file mode 100644
index 0000000..b4eaa53
--- /dev/null
+++ b/Workshop4- Homework/myworld/myworld/celery.py
@@ -0,0 +1,33 @@
+import os
+from celery import Celery
+
+# Set the default Django settings module for the 'celery' program.
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings')
+
+app = Celery('myworld')
+
+# Using a string here means the worker doesn't have to serialize
+# the configuration object to child processes.
+# - namespace='CELERY' means all celery-related configuration keys
+# should have a `CELERY_` prefix.
+app.config_from_object('django.conf:settings', namespace='CELERY')
+
+# Load task modules from all registered Django apps.
+app.autodiscover_tasks()
+
+
+@app.task(bind=True)
+def debug_task(self):
+ print(f'Request: {self.request!r}')
+
+app.conf.beat_schedule = {
+ #Scheduler Name
+ 'run-task-ten-seconds': {
+ # Task Name (Name Specified in Decorator)
+ 'task': 'extract',
+ # Schedule
+ 'schedule': 60.0,
+ # Function Arguments
+ 'args': (1,)
+ }
+}
diff --git a/Workshop4- Homework/myworld/myworld/settings.py b/Workshop4- Homework/myworld/myworld/settings.py
new file mode 100644
index 0000000..5f09aba
--- /dev/null
+++ b/Workshop4- Homework/myworld/myworld/settings.py
@@ -0,0 +1,126 @@
+import os
+
+from pathlib import Path
+
+# Build paths inside the project like this: BASE_DIR / 'subdir'.
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = 'django-insecure-p(&-i=zw$r=bqzck3oi3y9%*4ps!a*ierb803y_jcpqd+!z_9@'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+
+ALLOWED_HOSTS = ['*']
+
+
+# Application definition
+
+INSTALLED_APPS = [
+ 'django.contrib.admin',
+ 'django.contrib.auth',
+ 'django.contrib.contenttypes',
+ 'django.contrib.sessions',
+ 'django.contrib.messages',
+ 'django.contrib.staticfiles',
+ 'members.apps.MembersConfig'
+]
+
+MIDDLEWARE = [
+ 'django.middleware.security.SecurityMiddleware',
+ 'django.contrib.sessions.middleware.SessionMiddleware',
+ 'django.middleware.common.CommonMiddleware',
+ 'django.middleware.csrf.CsrfViewMiddleware',
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
+ 'django.contrib.messages.middleware.MessageMiddleware',
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
+]
+
+ROOT_URLCONF = 'myworld.urls'
+
+TEMPLATES = [
+ {
+ 'BACKEND': 'django.template.backends.django.DjangoTemplates',
+ 'DIRS': [],
+ 'APP_DIRS': True,
+ 'OPTIONS': {
+ 'context_processors': [
+ 'django.template.context_processors.debug',
+ 'django.template.context_processors.request',
+ 'django.contrib.auth.context_processors.auth',
+ 'django.contrib.messages.context_processors.messages',
+ ],
+ },
+ },
+]
+
+WSGI_APPLICATION = 'myworld.wsgi.application'
+
+
+# Database
+# https://docs.djangoproject.com/en/4.2/ref/settings/#databases
+
+DATABASES = {
+ 'default': {
+ 'ENGINE': 'django.db.backends.postgresql',
+ 'NAME': 'member_db',
+ 'USER': 'postgres',
+ 'PASSWORD': '123456',
+ 'HOST': 'psql-db',
+ 'PORT': 5432,
+ }
+}
+
+# Password validation
+# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+ {
+ 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+ },
+ {
+ 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+ },
+ {
+ 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+ },
+ {
+ 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+ },
+]
+
+
+# Internationalization
+# https://docs.djangoproject.com/en/4.2/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = 'UTC'
+
+USE_I18N = True
+
+USE_TZ = True
+
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/4.2/howto/static-files/
+
+STATIC_URL = 'static/'
+
+# Default primary key field type
+# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
+
+DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
+
+CELERY_TASK_SERIALIZER = 'json'
+CELERY_RESULT_SERIALIZER = 'json'
+CELERY_TIMEZONE = 'America/Los_Angeles'
+# This configures rabbitmq as the datastore between Django + Celery
+CELERY_BROKER_URL = 'amqp://{0}:{1}@{2}:{3}/{4}'.format(
+ os.environ["RABBITMQ_DEFAULT_USER"], os.environ["RABBITMQ_DEFAULT_PASS"],
+ os.environ["BROKER_HOST"], os.environ["BROKER_PORT"],
+ os.environ["RABBITMQ_DEFAULT_VHOST"])
diff --git a/Workshop4- Homework/myworld/myworld/urls.py b/Workshop4- Homework/myworld/myworld/urls.py
new file mode 100644
index 0000000..921daef
--- /dev/null
+++ b/Workshop4- Homework/myworld/myworld/urls.py
@@ -0,0 +1,23 @@
+"""
+URL configuration for myworld project.
+
+The `urlpatterns` list routes URLs to views. For more information please see:
+ https://docs.djangoproject.com/en/4.2/topics/http/urls/
+Examples:
+Function views
+ 1. Add an import: from my_app import views
+ 2. Add a URL to urlpatterns: path('', views.home, name='home')
+Class-based views
+ 1. Add an import: from other_app.views import Home
+ 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
+Including another URLconf
+ 1. Import the include() function: from django.urls import include, path
+ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
+"""
+from django.contrib import admin
+from django.urls import include, path
+
+urlpatterns = [
+ path('members/', include('members.urls')),
+ path('admin/', admin.site.urls),
+]
diff --git a/Workshop4- Homework/myworld/myworld/wsgi.py b/Workshop4- Homework/myworld/myworld/wsgi.py
new file mode 100644
index 0000000..cb3f79d
--- /dev/null
+++ b/Workshop4- Homework/myworld/myworld/wsgi.py
@@ -0,0 +1,16 @@
+"""
+WSGI config for myworld project.
+
+It exposes the WSGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
+"""
+
+import os
+
+from django.core.wsgi import get_wsgi_application
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld.settings')
+
+application = get_wsgi_application()
diff --git a/Workshop4- Homework/myworld/p_file.prof b/Workshop4- Homework/myworld/p_file.prof
new file mode 100644
index 0000000..ed2ed94
Binary files /dev/null and b/Workshop4- Homework/myworld/p_file.prof differ
diff --git a/Workshop4- Homework/myworld/script.py b/Workshop4- Homework/myworld/script.py
new file mode 100644
index 0000000..d2edb6f
--- /dev/null
+++ b/Workshop4- Homework/myworld/script.py
@@ -0,0 +1,27 @@
+import random
+from memory_profiler import profile
+# Simple function to print messages
+@profile
+def print_msg():
+ for i in range(10):
+ print("Program completed")
+
+# Generate random data
+@profile
+def generate():
+ data = [random.randint(0, 99) for p in range(0, 1000)]
+ return data
+
+# Function to search
+@profile
+def search_function(data):
+ for i in data:
+ if i in [100,200,300,400,500]:
+ print("success")
+
+def main():
+ data=generate()
+ search_function(data)
+ print_msg()
+
+main()
diff --git a/Workshop4- Homework/myworld/web_scrapper.py b/Workshop4- Homework/myworld/web_scrapper.py
new file mode 100644
index 0000000..fa44338
--- /dev/null
+++ b/Workshop4- Homework/myworld/web_scrapper.py
@@ -0,0 +1,86 @@
+import psycopg2
+import requests
+import re
+from bs4 import BeautifulSoup, element
+from datetime import datetime
+
+# For the credentials mentioned below, you may refer the docker-compose.yml present in myworld .
+db_name = 'member_db'
+db_user = 'postgres'
+db_pass = '123456'
+db_host = 'psql-db'
+db_port = '5432'
+
+# This will create the connection the to postgres database.
+conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host, port=db_port)
+
+
+def add_row_to_blog(title, author, date, time):
+ # This function will add the entry to database
+ sql = """INSERT INTO members_blog (title, release_date, blog_time, author, created_date) VALUES (%s, %s::DATE, %s::TIME, %s, NOW())"""
+
+ with conn:
+ with conn.cursor() as curs:
+ curs.execute(sql, (title, date, time, author))
+
+
+def truncate_table():
+ # This function will delete the existing entries from the database.
+ with conn:
+ with conn.cursor() as curs:
+ curs.execute("TRUNCATE members_blog CASCADE;")
+
+
+def convert_time_format(time_str):
+ # Convert time string to a valid format recognized by PostgreSQL
+ time_obj = datetime.strptime(time_str, "%I:%M %p")
+ return time_obj.strftime("%H:%M:%S")
+
+
+def start_extraction():
+ print("Extraction started")
+ url = "https://blog.python.org/"
+
+ # Each time when we add a new entry, we delete the existing entries.
+ truncate_table()
+ data = requests.get(url)
+ page_soup = BeautifulSoup(data.text, 'html.parser')
+
+ # Getting all the articles
+ blogs = page_soup.select('div.date-outer')
+
+ for blog in blogs:
+ # loop through each article
+ date = blog.select('.date-header span')[0].get_text()
+
+ post = blog.select('.post')[0]
+
+ title = ""
+ title_bar = post.select('.post-title')
+ if len(title_bar) > 0:
+ title = title_bar[0].text
+ else:
+ title = post.select('.post-body')[0].contents[0].text
+
+ # getting the author and blog time
+ post_footer = post.select('.post-footer')[0]
+
+ author = post_footer.select('.post-author span')[0].text
+
+ time = post_footer.select('abbr')[0].text
+ time = convert_time_format(time) # Convert time format
+ # Inserting data into the database
+ add_row_to_blog(title, author, date, time)
+
+ print("\nTitle:", title.strip('\n'))
+ print("Date:", date)
+ print("Time:", time)
+ print("Author:", author)
+
+ print(
+ "\n---------------------------------------------------------------------------------------------------------------\n")
+
+
+if __name__ == "__main__":
+ start_extraction()
+
diff --git a/Workshop4- Homework/test_celery/__pycache__/celery.cpython-310.pyc b/Workshop4- Homework/test_celery/__pycache__/celery.cpython-310.pyc
new file mode 100644
index 0000000..1c74497
Binary files /dev/null and b/Workshop4- Homework/test_celery/__pycache__/celery.cpython-310.pyc differ
diff --git a/Workshop4- Homework/test_celery/__pycache__/run_tasks.cpython-310.pyc b/Workshop4- Homework/test_celery/__pycache__/run_tasks.cpython-310.pyc
new file mode 100644
index 0000000..0f8c508
Binary files /dev/null and b/Workshop4- Homework/test_celery/__pycache__/run_tasks.cpython-310.pyc differ
diff --git a/Workshop4- Homework/test_celery/__pycache__/tasks.cpython-310.pyc b/Workshop4- Homework/test_celery/__pycache__/tasks.cpython-310.pyc
new file mode 100644
index 0000000..5685795
Binary files /dev/null and b/Workshop4- Homework/test_celery/__pycache__/tasks.cpython-310.pyc differ
diff --git a/Workshop4- Homework/test_celery/celery.py b/Workshop4- Homework/test_celery/celery.py
new file mode 100644
index 0000000..f21799c
--- /dev/null
+++ b/Workshop4- Homework/test_celery/celery.py
@@ -0,0 +1,7 @@
+from __future__ import absolute_import
+from celery import Celery
+
+app = Celery('test_celery',
+ broker='amqp://jimmy:jimmy123@localhost/jimmy_vhost',
+ backend='rpc://',
+ include=['test_celery.tasks'])
diff --git a/Workshop4- Homework/test_celery/run_tasks.py b/Workshop4- Homework/test_celery/run_tasks.py
new file mode 100644
index 0000000..24a1442
--- /dev/null
+++ b/Workshop4- Homework/test_celery/run_tasks.py
@@ -0,0 +1,13 @@
+from .tasks import longtime_add
+import time
+
+if __name__ == '__main__':
+ result = longtime_add.delay(1,2)
+ # at this time, our task is not finished, so it will return False
+ print ('Task finished? ', result.ready())
+ print ('Task result: ', result.result)
+ # sleep 10 seconds to ensure the task has been finished
+ time.sleep(10)
+ # now the task should be finished and ready method will return True
+ print ('Task finished? ', result.ready())
+ print ('Task result: ', result.result)
diff --git a/Workshop4- Homework/test_celery/tasks.py b/Workshop4- Homework/test_celery/tasks.py
new file mode 100644
index 0000000..88f2f1e
--- /dev/null
+++ b/Workshop4- Homework/test_celery/tasks.py
@@ -0,0 +1,11 @@
+from __future__ import absolute_import
+from test_celery.celery import app
+import time
+
+@app.task
+def longtime_add(x, y):
+ print ('long time task begins')
+ # sleep 5 seconds
+ time.sleep(5)
+ print ('long time task finished')
+ return x + y