-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtask4.py
138 lines (98 loc) · 3.02 KB
/
task4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import json
import requests
from bs4 import BeautifulSoup
from pprint import pprint
file = open('deepak.json', 'r')
data = json.load(file)
Counter = 0
details_of_movie = []
for i in data :
url = i['Url']
response = requests.get(url)
Soup = BeautifulSoup(response.text , "html.parser")
Movies = {}
div = Soup.find('div' , class_ = 'title_wrapper').h1.get_text()
movie_name = ''
Movie_details = []
for i in div :
if i != '\xa0' :
movie_name+=i
else:
break
Movies['Name'] = movie_name
sub_text = Soup.find('div', class_='subtext')
time = sub_text.find('time').text.strip()
list_ = list(time)
Duration = ''
for i in list_ :
if i != "h" :
Duration+=i
else:
break
Duration = int(Duration)
time_ = Duration*60
a = time_
b = ''
for i in range(len(list_)):
index_ = i - len(list_)
if list_[index_] in "0123456789" :
b+=list_[index_]
elif list_[index_] == "h" :
break
time_of_movie = a+int(b)
# Movie_details.append(time_of_movie)
Movies['Runtime'] = time_of_movie
gener = sub_text.find_all('a')
list_of_gener = []
for i in gener :
list_of_gener.append(i.text)
list_of_gener.pop()
# Movie_details.append(list_of_gener)
Movies['Genre'] = list_of_gener
summary = ''
div_ = Soup.find('div' , class_ = 'plot_summary')
bio = div_.find('div' , class_ = 'summary_text')
summary += bio.text.strip()
# Movie_details.append(summary)
Movies['summary'] = summary
credit_summary_item = div_.find_all('div' , class_ = 'credit_summary_item')
Directors = []
for i in credit_summary_item :
if "Director" in i.text :
Director = i.find_all('a')
for j in Director :
Directors.append(j.text)
# Movie_details.append(Directors)
Movies['Director'] = Directors
div__ = Soup.find('div' , class_ = 'poster')
Poster = div__.find('a')
images= Poster.find("img")['src']
# Movie_details.append(images)
Movies['Image'] = images
div1_ = Soup.find_all('div' , class_ = 'txt-block')
languages = []
for i in div1_:
if "Country" in i.text :
Country= i.find("a").text
# Movie_details.append(Country)
Movies['Country'] = Country
for i in div1_ :
if "Language" in i.text :
Language = i.find_all("a")
for j in Language :
languages.append(j.text)
# Movie_details.append(languages)
Movies['Languages'] = languages
details_of_movie.append(Movies)
Counter += 1
if Counter == 10 :
break
file = open('data_of_task4.json','w')
data = json.dump(details_of_movie , file , indent = 4)
print(details_of_movie)
Movies = {"Movie" , "Runtime ", "Genre" , "Bio" , "Director" , "Poster_ url" , "Language" }
for i in Movies :
c = 0
Movies[i] = Movie_details[c]
c+=1
print(Movies)